{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "31f3e96f",
   "metadata": {},
   "source": [
    "# Use AlphaZero to Play Tic-Tac-Toe\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ac9ccbcb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import collections\n",
    "import math\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import losses\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import regularizers\n",
    "\n",
    "import boardgame2\n",
    "from boardgame2 import BLACK, WHITE\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cbdde5a3",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9eaa1107",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:11:12 [INFO] allow_pass: True\n",
      "00:11:12 [INFO] illegal_equivalent_action: [-1  0]\n",
      "00:11:12 [INFO] render_characters: {0: '+', 1: 'o', -1: 'x'}\n",
      "00:11:12 [INFO] board: [[0. 0. 0.]\n",
      " [0. 0. 0.]\n",
      " [0. 0. 0.]]\n",
      "00:11:12 [INFO] observation_space: Tuple(Box(-1, 1, (3, 3), int8), Box(-1, 1, (), int8))\n",
      "00:11:12 [INFO] action_space: Box(-1, 2, (2,), int8)\n",
      "00:11:12 [INFO] target_length: 3\n",
      "00:11:12 [INFO] spec: EnvSpec(TicTacToe-v0)\n",
      "00:11:12 [INFO] id: TicTacToe-v0\n",
      "00:11:12 [INFO] entry_point: boardgame2:KInARowEnv\n",
      "00:11:12 [INFO] reward_threshold: None\n",
      "00:11:12 [INFO] nondeterministic: False\n",
      "00:11:12 [INFO] max_episode_steps: None\n",
      "00:11:12 [INFO] _kwargs: {'board_shape': 3, 'target_length': 3}\n",
      "00:11:12 [INFO] _env_name: TicTacToe\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('TicTacToe-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f268073",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "18e57c1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroReplayer:\n",
    "    def __init__(self):\n",
    "        self.fields = ['player', 'board', 'prob', 'winner']\n",
    "        self.memory = pd.DataFrame(columns=self.fields)\n",
    "\n",
    "    def store(self, df):\n",
    "        self.memory = pd.concat([self.memory, df[self.fields]], ignore_index=True)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.memory.shape[0], size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5a103efb",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroNet(keras.Model):\n",
    "    def __init__(self, input_shape, regularizer=regularizers.l2(1e-4)):\n",
    "        super().__init__()\n",
    "\n",
    "        # common net\n",
    "        self.input_net = keras.Sequential([\n",
    "                layers.Reshape(input_shape + (1,)),\n",
    "                layers.Conv2D(256, kernel_size=3, padding='same',\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer),\n",
    "                layers.BatchNormalization(), layers.ReLU()])\n",
    "        self.residual_nets = [keras.Sequential([\n",
    "                layers.Conv2D(256, kernel_size=3, padding='same',\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer),\n",
    "                layers.BatchNormalization()]) for _ in range(2)]\n",
    "\n",
    "        # probability net\n",
    "        self.prob_net = keras.Sequential([\n",
    "                layers.Conv2D(256, kernel_size=3, padding='same',\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer),\n",
    "                layers.BatchNormalization(), layers.ReLU(),\n",
    "                layers.Conv2D(1, kernel_size=3, padding='same',\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer),\n",
    "                layers.Flatten(), layers.Softmax(),\n",
    "                layers.Reshape(input_shape)])\n",
    "\n",
    "        # value net\n",
    "        self.value_net = keras.Sequential([\n",
    "                layers.Conv2D(1, kernel_size=3, padding='same',\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer),\n",
    "                layers.BatchNormalization(), layers.ReLU(),\n",
    "                layers.Flatten(),\n",
    "                layers.Dense(1, activation=nn.tanh,\n",
    "                kernel_regularizer=regularizer,\n",
    "                bias_regularizer=regularizer)])\n",
    "\n",
    "\n",
    "    def call(self, board_tensor):\n",
    "        # common net\n",
    "        x = self.input_net(board_tensor)\n",
    "        for i_net, residual_net in enumerate(self.residual_nets):\n",
    "            y = residual_net(x)\n",
    "            if i_net == len(self.residual_nets) - 1:\n",
    "                y = y + x\n",
    "            x = nn.relu(y)\n",
    "        common_feature_tensor = x\n",
    "\n",
    "        # probability net\n",
    "        prob_tensor = self.prob_net(common_feature_tensor)\n",
    "\n",
    "        # value net\n",
    "        v_tensor = self.value_net(common_feature_tensor)\n",
    "\n",
    "        return prob_tensor, v_tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1a8e2759",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AlphaZeroAgent:\n",
    "    def __init__(self, env):\n",
    "        self.env = env\n",
    "\n",
    "        self.replayer = AlphaZeroReplayer()\n",
    "\n",
    "        self.board = np.zeros_like(env.board)\n",
    "        self.net = self.build_net()\n",
    "\n",
    "        self.reset_mcts()\n",
    "\n",
    "    def build_net(self, learning_rate=0.001):\n",
    "        net = AlphaZeroNet(input_shape=self.board.shape)\n",
    "\n",
    "        def categorical_crossentropy_2d(y_true, y_pred):\n",
    "            labels = tf.reshape(y_true, [-1, self.board.size])\n",
    "            preds = tf.reshape(y_pred, [-1, self.board.size])\n",
    "            return losses.categorical_crossentropy(labels, preds)\n",
    "\n",
    "        loss = [categorical_crossentropy_2d, losses.MSE]\n",
    "        optimizer = optimizers.Adam(learning_rate)\n",
    "        net.compile(loss=loss, optimizer=optimizer)\n",
    "        return net\n",
    "\n",
    "    def reset_mcts(self):\n",
    "        def zero_board_factory(): # for construct default_dict\n",
    "            return np.zeros_like(self.board, dtype=float)\n",
    "        self.q = collections.defaultdict(zero_board_factory)\n",
    "            # q estimates: board -> board\n",
    "        self.count = collections.defaultdict(zero_board_factory)\n",
    "            # q count visitation: board -> board\n",
    "        self.policy = {} # policy: board -> board\n",
    "        self.valid = {} # valid position: board -> board\n",
    "        self.winner = {} # winner: board -> None or int\n",
    "\n",
    "    def reset(self, mode):\n",
    "        self.mode = mode\n",
    "        if mode == \"train\":\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, winner, _):\n",
    "        board, player = observation\n",
    "        canonical_board = player * board\n",
    "        s = boardgame2.strfboard(canonical_board)\n",
    "        while self.count[s].sum() < 200: # conduct MCTS 200 times\n",
    "            self.search(canonical_board, prior_noise=True)\n",
    "        prob = self.count[s] / self.count[s].sum()\n",
    "\n",
    "        # sample\n",
    "        location_index = np.random.choice(prob.size, p=prob.reshape(-1))\n",
    "        action = np.unravel_index(location_index, prob.shape)\n",
    "\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [player, board, prob, winner]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.save_trajectory_to_replayer()\n",
    "            if len(self.replayer.memory) >= 1000:\n",
    "                for batch in range(2): # learn multiple times\n",
    "                    self.learn()\n",
    "                self.replayer = AlphaZeroReplayer() # reset replayer after the agent changes itself\n",
    "                self.reset_mcts()\n",
    "\n",
    "    def save_trajectory_to_replayer(self):\n",
    "        df = pd.DataFrame(\n",
    "                np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['player', 'board', 'prob', 'winner'], dtype=object)\n",
    "        winner = self.trajectory[-1]\n",
    "        df['winner'] = winner\n",
    "        self.replayer.store(df)\n",
    "\n",
    "    def search(self, board, prior_noise=False): # MCTS\n",
    "        s = boardgame2.strfboard(board)\n",
    "\n",
    "        if s not in self.winner:\n",
    "            self.winner[s] = self.env.get_winner((board, BLACK))\n",
    "        if self.winner[s] is not None: # if there is a winner\n",
    "            return self.winner[s]\n",
    "\n",
    "        if s not in self.policy: # leaf that has not calculate the policy\n",
    "            boards = board[np.newaxis].astype(float)\n",
    "            pis, vs = self.net.predict(boards)\n",
    "            pi, v = pis[0], vs[0]\n",
    "            valid = self.env.get_valid((board, BLACK))\n",
    "            masked_pi = pi * valid\n",
    "            total_masked_pi = np.sum(masked_pi)\n",
    "            if total_masked_pi <= 0:\n",
    "                # all valid actions do not have probabilities. rarely occur\n",
    "                masked_pi = valid # workaround\n",
    "                total_masked_pi = np.sum(masked_pi)\n",
    "            self.policy[s] = masked_pi / total_masked_pi\n",
    "            self.valid[s] = valid\n",
    "            return v\n",
    "\n",
    "        # calculate PUCT\n",
    "        count_sum = self.count[s].sum()\n",
    "        c_init = 1.25\n",
    "        c_base = 19652.\n",
    "        coef = (c_init + np.log1p((1 + count_sum) / c_base)) * \\\n",
    "                math.sqrt(count_sum) / (1. + self.count[s])\n",
    "        if prior_noise:\n",
    "            alpha = 1. / self.valid[s].sum()\n",
    "            noise = np.random.gamma(alpha, 1., board.shape)\n",
    "            noise *= self.valid[s]\n",
    "            noise /= noise.sum()\n",
    "            prior_exploration_fraction = 0.25\n",
    "            prior = (1. - prior_exploration_fraction) * self.policy[s] \\\n",
    "                    + prior_exploration_fraction * noise\n",
    "        else:\n",
    "            prior = self.policy[s]\n",
    "        ub = np.where(self.valid[s], self.q[s] + coef * prior, np.nan)\n",
    "        location_index = np.nanargmax(ub)\n",
    "        location = np.unravel_index(location_index, board.shape)\n",
    "\n",
    "        (next_board, next_player), _, _, _ = self.env.next_step(\n",
    "                (board, BLACK), np.array(location))\n",
    "        next_canonical_board = next_player * next_board\n",
    "        next_v = self.search(next_canonical_board) # recursive\n",
    "        v = next_player * next_v\n",
    "\n",
    "        self.count[s][location] += 1\n",
    "        self.q[s][location] += (v - self.q[s][location]) / \\\n",
    "                self.count[s][location]\n",
    "        return v\n",
    "\n",
    "    def learn(self):\n",
    "        players, boards, probs, winners = self.replayer.sample(64)\n",
    "        canonical_boards = (players[:, np.newaxis, np.newaxis] * boards).astype(\n",
    "                float)\n",
    "        vs = (players * winners)[:, np.newaxis].astype(float)\n",
    "        self.net.fit(canonical_boards, [probs, vs], verbose=0)\n",
    "\n",
    "\n",
    "agent = AlphaZeroAgent(env=env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d3486aa0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:12:21 [INFO] train episode 0: winner = 1, steps = 6\n",
      "00:13:13 [INFO] train episode 1: winner = 1, steps = 6\n",
      "00:13:47 [INFO] train episode 2: winner = 1, steps = 6\n",
      "00:14:02 [INFO] train episode 3: winner = 0, steps = 8\n",
      "00:14:45 [INFO] train episode 4: winner = 0, steps = 8\n",
      "00:14:49 [INFO] train episode 5: winner = -1, steps = 5\n",
      "00:15:26 [INFO] train episode 6: winner = 1, steps = 6\n",
      "00:15:38 [INFO] train episode 7: winner = 0, steps = 8\n",
      "00:16:11 [INFO] train episode 8: winner = 0, steps = 8\n",
      "00:16:30 [INFO] train episode 9: winner = 1, steps = 8\n",
      "00:16:50 [INFO] train episode 10: winner = 1, steps = 6\n",
      "00:16:56 [INFO] train episode 11: winner = 1, steps = 4\n",
      "00:16:59 [INFO] train episode 12: winner = 1, steps = 6\n",
      "00:17:20 [INFO] train episode 13: winner = 0, steps = 8\n",
      "00:17:23 [INFO] train episode 14: winner = 1, steps = 4\n",
      "00:17:28 [INFO] train episode 15: winner = 1, steps = 6\n",
      "00:17:30 [INFO] train episode 16: winner = 1, steps = 6\n",
      "00:17:40 [INFO] train episode 17: winner = 1, steps = 4\n",
      "00:17:41 [INFO] train episode 18: winner = 1, steps = 4\n",
      "00:17:43 [INFO] train episode 19: winner = 1, steps = 4\n",
      "00:17:48 [INFO] train episode 20: winner = 1, steps = 6\n",
      "00:17:55 [INFO] train episode 21: winner = 1, steps = 4\n",
      "00:18:05 [INFO] train episode 22: winner = 0, steps = 8\n",
      "00:18:06 [INFO] train episode 23: winner = 1, steps = 4\n",
      "00:18:10 [INFO] train episode 24: winner = 1, steps = 6\n",
      "00:18:27 [INFO] train episode 25: winner = 0, steps = 8\n",
      "00:18:29 [INFO] train episode 26: winner = -1, steps = 7\n",
      "00:18:32 [INFO] train episode 27: winner = 1, steps = 4\n",
      "00:18:45 [INFO] train episode 28: winner = 1, steps = 6\n",
      "00:18:55 [INFO] train episode 29: winner = -1, steps = 7\n",
      "00:18:57 [INFO] train episode 30: winner = 1, steps = 6\n",
      "00:19:04 [INFO] train episode 31: winner = 1, steps = 6\n",
      "00:19:29 [INFO] train episode 32: winner = 1, steps = 6\n",
      "00:19:35 [INFO] train episode 33: winner = 0, steps = 8\n",
      "00:19:37 [INFO] train episode 34: winner = 0, steps = 8\n",
      "00:19:44 [INFO] train episode 35: winner = 1, steps = 6\n",
      "00:19:47 [INFO] train episode 36: winner = -1, steps = 5\n",
      "00:19:54 [INFO] train episode 37: winner = 0, steps = 8\n",
      "00:19:56 [INFO] train episode 38: winner = 1, steps = 6\n",
      "00:19:59 [INFO] train episode 39: winner = 1, steps = 6\n",
      "00:20:03 [INFO] train episode 40: winner = 0, steps = 8\n",
      "00:20:04 [INFO] train episode 41: winner = 1, steps = 4\n",
      "00:20:10 [INFO] train episode 42: winner = 1, steps = 6\n",
      "00:20:14 [INFO] train episode 43: winner = 1, steps = 6\n",
      "00:20:15 [INFO] train episode 44: winner = -1, steps = 7\n",
      "00:20:17 [INFO] train episode 45: winner = -1, steps = 5\n",
      "00:20:18 [INFO] train episode 46: winner = 1, steps = 6\n",
      "00:20:26 [INFO] train episode 47: winner = 1, steps = 6\n",
      "00:20:30 [INFO] train episode 48: winner = 1, steps = 6\n",
      "00:20:30 [INFO] train episode 49: winner = 1, steps = 6\n",
      "00:20:30 [INFO] train episode 50: winner = 1, steps = 4\n",
      "00:20:33 [INFO] train episode 51: winner = 1, steps = 6\n",
      "00:20:34 [INFO] train episode 52: winner = 1, steps = 4\n",
      "00:20:42 [INFO] train episode 53: winner = 1, steps = 6\n",
      "00:20:48 [INFO] train episode 54: winner = -1, steps = 7\n",
      "00:20:49 [INFO] train episode 55: winner = 1, steps = 6\n",
      "00:20:55 [INFO] train episode 56: winner = 0, steps = 8\n",
      "00:21:01 [INFO] train episode 57: winner = 1, steps = 6\n",
      "00:21:02 [INFO] train episode 58: winner = 1, steps = 4\n",
      "00:21:08 [INFO] train episode 59: winner = 0, steps = 8\n",
      "00:21:15 [INFO] train episode 60: winner = -1, steps = 5\n",
      "00:21:17 [INFO] train episode 61: winner = 1, steps = 6\n",
      "00:21:19 [INFO] train episode 62: winner = 1, steps = 4\n",
      "00:21:22 [INFO] train episode 63: winner = 0, steps = 8\n",
      "00:21:25 [INFO] train episode 64: winner = 1, steps = 6\n",
      "00:21:28 [INFO] train episode 65: winner = 0, steps = 8\n",
      "00:21:30 [INFO] train episode 66: winner = 0, steps = 8\n",
      "00:21:31 [INFO] train episode 67: winner = 1, steps = 4\n",
      "00:21:32 [INFO] train episode 68: winner = 1, steps = 6\n",
      "00:21:32 [INFO] train episode 69: winner = -1, steps = 5\n",
      "00:21:34 [INFO] train episode 70: winner = -1, steps = 5\n",
      "00:21:37 [INFO] train episode 71: winner = 0, steps = 8\n",
      "00:21:40 [INFO] train episode 72: winner = 1, steps = 6\n",
      "00:21:42 [INFO] train episode 73: winner = 1, steps = 4\n",
      "00:21:42 [INFO] train episode 74: winner = 1, steps = 4\n",
      "00:21:44 [INFO] train episode 75: winner = -1, steps = 7\n",
      "00:21:45 [INFO] train episode 76: winner = 1, steps = 8\n",
      "00:21:45 [INFO] train episode 77: winner = 1, steps = 4\n",
      "00:21:46 [INFO] train episode 78: winner = 1, steps = 6\n",
      "00:21:47 [INFO] train episode 79: winner = 1, steps = 6\n",
      "00:21:48 [INFO] train episode 80: winner = 0, steps = 8\n",
      "00:21:51 [INFO] train episode 81: winner = -1, steps = 7\n",
      "00:21:53 [INFO] train episode 82: winner = 0, steps = 8\n",
      "00:21:53 [INFO] train episode 83: winner = 1, steps = 4\n",
      "00:21:53 [INFO] train episode 84: winner = 1, steps = 4\n",
      "00:22:01 [INFO] train episode 85: winner = 0, steps = 8\n",
      "00:22:01 [INFO] train episode 86: winner = 1, steps = 4\n",
      "00:22:04 [INFO] train episode 87: winner = 1, steps = 6\n",
      "00:22:06 [INFO] train episode 88: winner = 0, steps = 8\n",
      "00:22:08 [INFO] train episode 89: winner = 1, steps = 6\n",
      "00:22:12 [INFO] train episode 90: winner = 1, steps = 6\n",
      "00:22:12 [INFO] train episode 91: winner = 0, steps = 8\n",
      "00:22:14 [INFO] train episode 92: winner = 1, steps = 6\n",
      "00:22:14 [INFO] train episode 93: winner = 0, steps = 8\n",
      "00:22:15 [INFO] train episode 94: winner = 1, steps = 6\n",
      "00:22:15 [INFO] train episode 95: winner = 0, steps = 8\n",
      "00:22:18 [INFO] train episode 96: winner = 0, steps = 8\n",
      "00:22:19 [INFO] train episode 97: winner = 1, steps = 6\n",
      "00:22:21 [INFO] train episode 98: winner = -1, steps = 5\n",
      "00:22:21 [INFO] train episode 99: winner = 1, steps = 4\n",
      "00:22:22 [INFO] train episode 100: winner = 1, steps = 4\n",
      "00:22:22 [INFO] train episode 101: winner = 0, steps = 8\n",
      "00:22:23 [INFO] train episode 102: winner = 0, steps = 8\n",
      "00:22:24 [INFO] train episode 103: winner = 1, steps = 4\n",
      "00:22:24 [INFO] train episode 104: winner = 0, steps = 8\n",
      "00:22:27 [INFO] train episode 105: winner = -1, steps = 7\n",
      "00:22:27 [INFO] train episode 106: winner = 1, steps = 6\n",
      "00:22:28 [INFO] train episode 107: winner = 1, steps = 6\n",
      "00:22:29 [INFO] train episode 108: winner = -1, steps = 5\n",
      "00:22:31 [INFO] train episode 109: winner = -1, steps = 5\n",
      "00:22:32 [INFO] train episode 110: winner = 0, steps = 8\n",
      "00:22:32 [INFO] train episode 111: winner = 1, steps = 6\n",
      "00:22:33 [INFO] train episode 112: winner = 0, steps = 8\n",
      "00:22:37 [INFO] train episode 113: winner = 0, steps = 8\n",
      "00:22:39 [INFO] train episode 114: winner = 1, steps = 4\n",
      "00:22:39 [INFO] train episode 115: winner = -1, steps = 7\n",
      "00:22:41 [INFO] train episode 116: winner = -1, steps = 7\n",
      "00:22:41 [INFO] train episode 117: winner = -1, steps = 5\n",
      "00:22:41 [INFO] train episode 118: winner = 0, steps = 8\n",
      "00:22:43 [INFO] train episode 119: winner = -1, steps = 5\n",
      "00:22:44 [INFO] train episode 120: winner = 0, steps = 8\n",
      "00:22:46 [INFO] train episode 121: winner = 1, steps = 8\n",
      "00:22:46 [INFO] train episode 122: winner = 1, steps = 4\n",
      "00:22:46 [INFO] train episode 123: winner = -1, steps = 5\n",
      "00:22:46 [INFO] train episode 124: winner = 1, steps = 4\n",
      "00:22:46 [INFO] train episode 125: winner = 0, steps = 8\n",
      "00:22:46 [INFO] train episode 126: winner = 1, steps = 4\n",
      "00:22:47 [INFO] train episode 127: winner = 1, steps = 6\n",
      "00:22:47 [INFO] train episode 128: winner = 1, steps = 4\n",
      "00:22:49 [INFO] train episode 129: winner = 0, steps = 8\n",
      "00:22:51 [INFO] train episode 130: winner = 1, steps = 6\n",
      "00:22:52 [INFO] train episode 131: winner = 0, steps = 8\n",
      "00:22:55 [INFO] train episode 132: winner = 1, steps = 6\n",
      "00:22:55 [INFO] train episode 133: winner = -1, steps = 7\n",
      "00:22:56 [INFO] train episode 134: winner = 1, steps = 6\n",
      "00:23:02 [INFO] train episode 135: winner = 0, steps = 8\n",
      "00:23:02 [INFO] train episode 136: winner = 1, steps = 4\n",
      "00:23:03 [INFO] train episode 137: winner = 0, steps = 8\n",
      "00:23:03 [INFO] train episode 138: winner = 1, steps = 4\n",
      "00:23:06 [INFO] train episode 139: winner = -1, steps = 5\n",
      "00:23:11 [INFO] train episode 140: winner = 0, steps = 8\n",
      "00:23:11 [INFO] test episode 140:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:23:35 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "00:23:56 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "x+o\n",
      "+++\n",
      "00:24:15 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "x+o\n",
      "+++\n",
      "00:24:18 [INFO] step 3：player -1, action (2, 2)\n",
      "++o\n",
      "x+o\n",
      "++x\n",
      "00:24:19 [INFO] step 4：player 1, action (0, 0)\n",
      "o+o\n",
      "x+o\n",
      "++x\n",
      "00:24:19 [INFO] step 5：player -1, action (0, 1)\n",
      "oxo\n",
      "x+o\n",
      "++x\n",
      "00:24:19 [INFO] step 6：player 1, action (2, 0)\n",
      "oxo\n",
      "x+o\n",
      "o+x\n",
      "00:24:20 [INFO] step 7：player -1, action (1, 1)\n",
      "oxo\n",
      "xxo\n",
      "o+x\n",
      "00:24:20 [INFO] step 8：player 1, action (2, 1)\n",
      "oxo\n",
      "xxo\n",
      "oox\n",
      "00:24:20 [INFO] test episode 140: winner = 0, steps = 8\n",
      "00:24:49 [INFO] train episode 141: winner = 1, steps = 6\n",
      "00:25:28 [INFO] train episode 142: winner = -1, steps = 7\n",
      "00:26:20 [INFO] train episode 143: winner = 1, steps = 8\n",
      "00:26:42 [INFO] train episode 144: winner = 0, steps = 8\n",
      "00:27:02 [INFO] train episode 145: winner = 0, steps = 8\n",
      "00:27:41 [INFO] train episode 146: winner = 1, steps = 4\n",
      "00:28:14 [INFO] train episode 147: winner = 0, steps = 8\n",
      "00:28:36 [INFO] train episode 148: winner = 1, steps = 6\n",
      "00:28:45 [INFO] train episode 149: winner = 1, steps = 4\n",
      "00:28:50 [INFO] train episode 150: winner = 1, steps = 8\n",
      "00:28:51 [INFO] train episode 151: winner = 1, steps = 6\n",
      "00:29:15 [INFO] train episode 152: winner = 1, steps = 6\n",
      "00:29:22 [INFO] train episode 153: winner = 1, steps = 4\n",
      "00:29:30 [INFO] train episode 154: winner = 1, steps = 4\n",
      "00:29:33 [INFO] train episode 155: winner = 1, steps = 4\n",
      "00:30:02 [INFO] train episode 156: winner = -1, steps = 5\n",
      "00:30:11 [INFO] train episode 157: winner = 0, steps = 8\n",
      "00:30:19 [INFO] train episode 158: winner = 1, steps = 4\n",
      "00:30:24 [INFO] train episode 159: winner = -1, steps = 7\n",
      "00:30:24 [INFO] train episode 160: winner = 1, steps = 8\n",
      "00:30:27 [INFO] train episode 161: winner = 0, steps = 8\n",
      "00:30:41 [INFO] train episode 162: winner = 1, steps = 4\n",
      "00:30:42 [INFO] train episode 163: winner = 0, steps = 8\n",
      "00:30:50 [INFO] train episode 164: winner = 1, steps = 4\n",
      "00:31:23 [INFO] train episode 165: winner = 0, steps = 8\n",
      "00:31:28 [INFO] train episode 166: winner = 1, steps = 4\n",
      "00:31:40 [INFO] train episode 167: winner = 1, steps = 4\n",
      "00:31:50 [INFO] train episode 168: winner = 0, steps = 8\n",
      "00:31:56 [INFO] train episode 169: winner = 1, steps = 8\n",
      "00:31:57 [INFO] train episode 170: winner = 1, steps = 4\n",
      "00:32:04 [INFO] train episode 171: winner = 1, steps = 6\n",
      "00:32:06 [INFO] train episode 172: winner = 1, steps = 4\n",
      "00:32:09 [INFO] train episode 173: winner = 0, steps = 8\n",
      "00:32:11 [INFO] train episode 174: winner = 0, steps = 8\n",
      "00:32:20 [INFO] train episode 175: winner = 1, steps = 6\n",
      "00:32:22 [INFO] train episode 176: winner = 1, steps = 6\n",
      "00:32:30 [INFO] train episode 177: winner = 1, steps = 6\n",
      "00:32:32 [INFO] train episode 178: winner = 0, steps = 8\n",
      "00:32:35 [INFO] train episode 179: winner = 0, steps = 8\n",
      "00:32:37 [INFO] train episode 180: winner = 0, steps = 8\n",
      "00:32:39 [INFO] train episode 181: winner = 1, steps = 6\n",
      "00:32:47 [INFO] train episode 182: winner = 0, steps = 8\n",
      "00:32:48 [INFO] train episode 183: winner = -1, steps = 5\n",
      "00:32:49 [INFO] train episode 184: winner = 1, steps = 6\n",
      "00:32:52 [INFO] train episode 185: winner = 1, steps = 4\n",
      "00:33:00 [INFO] train episode 186: winner = 1, steps = 8\n",
      "00:33:02 [INFO] train episode 187: winner = 1, steps = 6\n",
      "00:33:11 [INFO] train episode 188: winner = -1, steps = 7\n",
      "00:33:14 [INFO] train episode 189: winner = 1, steps = 6\n",
      "00:33:15 [INFO] train episode 190: winner = 1, steps = 6\n",
      "00:33:19 [INFO] train episode 191: winner = 1, steps = 8\n",
      "00:33:19 [INFO] train episode 192: winner = -1, steps = 7\n",
      "00:33:20 [INFO] train episode 193: winner = 1, steps = 4\n",
      "00:33:29 [INFO] train episode 194: winner = 1, steps = 4\n",
      "00:33:34 [INFO] train episode 195: winner = 1, steps = 6\n",
      "00:33:37 [INFO] train episode 196: winner = 1, steps = 6\n",
      "00:33:43 [INFO] train episode 197: winner = 0, steps = 8\n",
      "00:33:46 [INFO] train episode 198: winner = -1, steps = 7\n",
      "00:33:49 [INFO] train episode 199: winner = 1, steps = 6\n",
      "00:33:49 [INFO] train episode 200: winner = 1, steps = 6\n",
      "00:33:56 [INFO] train episode 201: winner = 0, steps = 8\n",
      "00:33:56 [INFO] train episode 202: winner = 1, steps = 6\n",
      "00:34:14 [INFO] train episode 203: winner = 0, steps = 8\n",
      "00:34:16 [INFO] train episode 204: winner = 0, steps = 8\n",
      "00:34:16 [INFO] train episode 205: winner = 1, steps = 6\n",
      "00:34:17 [INFO] train episode 206: winner = 1, steps = 4\n",
      "00:34:18 [INFO] train episode 207: winner = -1, steps = 5\n",
      "00:34:22 [INFO] train episode 208: winner = 1, steps = 6\n",
      "00:34:23 [INFO] train episode 209: winner = 1, steps = 6\n",
      "00:34:28 [INFO] train episode 210: winner = 0, steps = 8\n",
      "00:34:30 [INFO] train episode 211: winner = 1, steps = 6\n",
      "00:34:35 [INFO] train episode 212: winner = 0, steps = 8\n",
      "00:34:36 [INFO] train episode 213: winner = 1, steps = 4\n",
      "00:34:37 [INFO] train episode 214: winner = -1, steps = 7\n",
      "00:34:39 [INFO] train episode 215: winner = 1, steps = 6\n",
      "00:34:39 [INFO] train episode 216: winner = 1, steps = 4\n",
      "00:34:41 [INFO] train episode 217: winner = 0, steps = 8\n",
      "00:34:42 [INFO] train episode 218: winner = 1, steps = 4\n",
      "00:34:43 [INFO] train episode 219: winner = 1, steps = 6\n",
      "00:34:46 [INFO] train episode 220: winner = 1, steps = 4\n",
      "00:34:46 [INFO] train episode 221: winner = 1, steps = 6\n",
      "00:34:48 [INFO] train episode 222: winner = 1, steps = 8\n",
      "00:34:50 [INFO] train episode 223: winner = 1, steps = 6\n",
      "00:34:54 [INFO] train episode 224: winner = 1, steps = 6\n",
      "00:35:01 [INFO] train episode 225: winner = 1, steps = 6\n",
      "00:35:03 [INFO] train episode 226: winner = 1, steps = 6\n",
      "00:35:03 [INFO] train episode 227: winner = 1, steps = 4\n",
      "00:35:03 [INFO] train episode 228: winner = 1, steps = 4\n",
      "00:35:04 [INFO] train episode 229: winner = 1, steps = 6\n",
      "00:35:04 [INFO] train episode 230: winner = 1, steps = 6\n",
      "00:35:05 [INFO] train episode 231: winner = 1, steps = 6\n",
      "00:35:06 [INFO] train episode 232: winner = -1, steps = 5\n",
      "00:35:06 [INFO] train episode 233: winner = 1, steps = 6\n",
      "00:35:08 [INFO] train episode 234: winner = 1, steps = 6\n",
      "00:35:14 [INFO] train episode 235: winner = -1, steps = 5\n",
      "00:35:17 [INFO] train episode 236: winner = 0, steps = 8\n",
      "00:35:19 [INFO] train episode 237: winner = 0, steps = 8\n",
      "00:35:19 [INFO] train episode 238: winner = 1, steps = 4\n",
      "00:35:19 [INFO] train episode 239: winner = 0, steps = 8\n",
      "00:35:21 [INFO] train episode 240: winner = 0, steps = 8\n",
      "00:35:23 [INFO] train episode 241: winner = -1, steps = 7\n",
      "00:35:25 [INFO] train episode 242: winner = -1, steps = 7\n",
      "00:35:26 [INFO] train episode 243: winner = 1, steps = 6\n",
      "00:35:26 [INFO] train episode 244: winner = -1, steps = 7\n",
      "00:35:27 [INFO] train episode 245: winner = -1, steps = 7\n",
      "00:35:27 [INFO] train episode 246: winner = 1, steps = 4\n",
      "00:35:30 [INFO] train episode 247: winner = 1, steps = 6\n",
      "00:35:32 [INFO] train episode 248: winner = 1, steps = 6\n",
      "00:35:33 [INFO] train episode 249: winner = 0, steps = 8\n",
      "00:35:36 [INFO] train episode 250: winner = 1, steps = 6\n",
      "00:35:37 [INFO] train episode 251: winner = -1, steps = 5\n",
      "00:35:38 [INFO] train episode 252: winner = 1, steps = 6\n",
      "00:35:42 [INFO] train episode 253: winner = -1, steps = 5\n",
      "00:35:42 [INFO] train episode 254: winner = 1, steps = 4\n",
      "00:35:42 [INFO] train episode 255: winner = 1, steps = 6\n",
      "00:35:44 [INFO] train episode 256: winner = 0, steps = 8\n",
      "00:35:45 [INFO] train episode 257: winner = -1, steps = 5\n",
      "00:35:45 [INFO] train episode 258: winner = 1, steps = 4\n",
      "00:35:45 [INFO] train episode 259: winner = 1, steps = 6\n",
      "00:35:46 [INFO] train episode 260: winner = 1, steps = 6\n",
      "00:35:46 [INFO] train episode 261: winner = 1, steps = 4\n",
      "00:35:46 [INFO] train episode 262: winner = 1, steps = 4\n",
      "00:35:47 [INFO] train episode 263: winner = 1, steps = 6\n",
      "00:35:49 [INFO] train episode 264: winner = 1, steps = 6\n",
      "00:35:50 [INFO] train episode 265: winner = -1, steps = 7\n",
      "00:35:50 [INFO] train episode 266: winner = 1, steps = 6\n",
      "00:35:52 [INFO] train episode 267: winner = 1, steps = 8\n",
      "00:35:52 [INFO] train episode 268: winner = 1, steps = 6\n",
      "00:35:53 [INFO] train episode 269: winner = 0, steps = 8\n",
      "00:35:53 [INFO] train episode 270: winner = 1, steps = 4\n",
      "00:35:56 [INFO] train episode 271: winner = 0, steps = 8\n",
      "00:35:57 [INFO] train episode 272: winner = -1, steps = 5\n",
      "00:35:58 [INFO] train episode 273: winner = -1, steps = 5\n",
      "00:35:58 [INFO] train episode 274: winner = 0, steps = 8\n",
      "00:36:00 [INFO] train episode 275: winner = 0, steps = 8\n",
      "00:36:02 [INFO] train episode 276: winner = 1, steps = 6\n",
      "00:36:04 [INFO] train episode 277: winner = -1, steps = 5\n",
      "00:36:05 [INFO] train episode 278: winner = -1, steps = 7\n",
      "00:36:06 [INFO] train episode 279: winner = 1, steps = 6\n",
      "00:36:06 [INFO] train episode 280: winner = 1, steps = 4\n",
      "00:36:08 [INFO] train episode 281: winner = -1, steps = 7\n",
      "00:36:08 [INFO] test episode 281:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:36:31 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "00:36:51 [INFO] step 1：player -1, action (2, 1)\n",
      "+++\n",
      "++o\n",
      "+x+\n",
      "00:37:08 [INFO] step 2：player 1, action (2, 2)\n",
      "+++\n",
      "++o\n",
      "+xo\n",
      "00:37:13 [INFO] step 3：player -1, action (0, 0)\n",
      "x++\n",
      "++o\n",
      "+xo\n",
      "00:37:15 [INFO] step 4：player 1, action (2, 0)\n",
      "x++\n",
      "++o\n",
      "oxo\n",
      "00:37:17 [INFO] step 5：player -1, action (0, 2)\n",
      "x+x\n",
      "++o\n",
      "oxo\n",
      "00:37:17 [INFO] step 6：player 1, action (0, 1)\n",
      "xox\n",
      "++o\n",
      "oxo\n",
      "00:37:17 [INFO] step 7：player -1, action (1, 1)\n",
      "xox\n",
      "+xo\n",
      "oxo\n",
      "00:37:17 [INFO] step 8：player 1, action (1, 0)\n",
      "xox\n",
      "oxo\n",
      "oxo\n",
      "00:37:17 [INFO] test episode 281: winner = 0, steps = 8\n",
      "00:37:59 [INFO] train episode 282: winner = 1, steps = 4\n",
      "00:38:18 [INFO] train episode 283: winner = -1, steps = 7\n",
      "00:39:03 [INFO] train episode 284: winner = 1, steps = 6\n",
      "00:39:39 [INFO] train episode 285: winner = 1, steps = 6\n",
      "00:39:56 [INFO] train episode 286: winner = -1, steps = 7\n",
      "00:40:13 [INFO] train episode 287: winner = 1, steps = 4\n",
      "00:41:02 [INFO] train episode 288: winner = 0, steps = 8\n",
      "00:41:15 [INFO] train episode 289: winner = -1, steps = 7\n",
      "00:41:49 [INFO] train episode 290: winner = 0, steps = 8\n",
      "00:42:03 [INFO] train episode 291: winner = 0, steps = 8\n",
      "00:42:28 [INFO] train episode 292: winner = 1, steps = 4\n",
      "00:42:40 [INFO] train episode 293: winner = -1, steps = 7\n",
      "00:42:41 [INFO] train episode 294: winner = 1, steps = 4\n",
      "00:42:49 [INFO] train episode 295: winner = -1, steps = 5\n",
      "00:43:07 [INFO] train episode 296: winner = 1, steps = 6\n",
      "00:43:15 [INFO] train episode 297: winner = 1, steps = 4\n",
      "00:43:16 [INFO] train episode 298: winner = -1, steps = 5\n",
      "00:43:29 [INFO] train episode 299: winner = 0, steps = 8\n",
      "00:43:37 [INFO] train episode 300: winner = 1, steps = 6\n",
      "00:43:40 [INFO] train episode 301: winner = 1, steps = 4\n",
      "00:43:42 [INFO] train episode 302: winner = 0, steps = 8\n",
      "00:43:45 [INFO] train episode 303: winner = 1, steps = 8\n",
      "00:43:51 [INFO] train episode 304: winner = 1, steps = 8\n",
      "00:44:09 [INFO] train episode 305: winner = 1, steps = 6\n",
      "00:44:25 [INFO] train episode 306: winner = 0, steps = 8\n",
      "00:44:34 [INFO] train episode 307: winner = 1, steps = 4\n",
      "00:44:39 [INFO] train episode 308: winner = 0, steps = 8\n",
      "00:44:41 [INFO] train episode 309: winner = 1, steps = 4\n",
      "00:44:46 [INFO] train episode 310: winner = 0, steps = 8\n",
      "00:44:55 [INFO] train episode 311: winner = 1, steps = 6\n",
      "00:45:00 [INFO] train episode 312: winner = 0, steps = 8\n",
      "00:45:07 [INFO] train episode 313: winner = 0, steps = 8\n",
      "00:45:10 [INFO] train episode 314: winner = 1, steps = 4\n",
      "00:45:10 [INFO] train episode 315: winner = 1, steps = 4\n",
      "00:45:12 [INFO] train episode 316: winner = -1, steps = 7\n",
      "00:45:14 [INFO] train episode 317: winner = -1, steps = 7\n",
      "00:45:16 [INFO] train episode 318: winner = 1, steps = 4\n",
      "00:45:23 [INFO] train episode 319: winner = 0, steps = 8\n",
      "00:45:27 [INFO] train episode 320: winner = 1, steps = 6\n",
      "00:45:30 [INFO] train episode 321: winner = 1, steps = 4\n",
      "00:45:33 [INFO] train episode 322: winner = 1, steps = 4\n",
      "00:45:33 [INFO] train episode 323: winner = 1, steps = 4\n",
      "00:45:35 [INFO] train episode 324: winner = 0, steps = 8\n",
      "00:45:38 [INFO] train episode 325: winner = -1, steps = 7\n",
      "00:45:49 [INFO] train episode 326: winner = 1, steps = 4\n",
      "00:45:51 [INFO] train episode 327: winner = 0, steps = 8\n",
      "00:45:58 [INFO] train episode 328: winner = 1, steps = 8\n",
      "00:46:04 [INFO] train episode 329: winner = 1, steps = 4\n",
      "00:46:07 [INFO] train episode 330: winner = 1, steps = 6\n",
      "00:46:07 [INFO] train episode 331: winner = 1, steps = 4\n",
      "00:46:11 [INFO] train episode 332: winner = 0, steps = 8\n",
      "00:46:12 [INFO] train episode 333: winner = 1, steps = 6\n",
      "00:46:19 [INFO] train episode 334: winner = -1, steps = 5\n",
      "00:46:19 [INFO] train episode 335: winner = 1, steps = 6\n",
      "00:46:20 [INFO] train episode 336: winner = 1, steps = 4\n",
      "00:46:25 [INFO] train episode 337: winner = 0, steps = 8\n",
      "00:46:25 [INFO] train episode 338: winner = -1, steps = 7\n",
      "00:46:28 [INFO] train episode 339: winner = 0, steps = 8\n",
      "00:46:31 [INFO] train episode 340: winner = -1, steps = 7\n",
      "00:46:32 [INFO] train episode 341: winner = -1, steps = 7\n",
      "00:46:34 [INFO] train episode 342: winner = 1, steps = 6\n",
      "00:46:34 [INFO] train episode 343: winner = 0, steps = 8\n",
      "00:46:39 [INFO] train episode 344: winner = 1, steps = 4\n",
      "00:46:40 [INFO] train episode 345: winner = -1, steps = 5\n",
      "00:46:47 [INFO] train episode 346: winner = 0, steps = 8\n",
      "00:46:47 [INFO] train episode 347: winner = 1, steps = 4\n",
      "00:46:49 [INFO] train episode 348: winner = -1, steps = 7\n",
      "00:46:55 [INFO] train episode 349: winner = 1, steps = 4\n",
      "00:46:58 [INFO] train episode 350: winner = 0, steps = 8\n",
      "00:46:59 [INFO] train episode 351: winner = 1, steps = 4\n",
      "00:47:04 [INFO] train episode 352: winner = 1, steps = 6\n",
      "00:47:06 [INFO] train episode 353: winner = 1, steps = 6\n",
      "00:47:06 [INFO] train episode 354: winner = 1, steps = 4\n",
      "00:47:06 [INFO] train episode 355: winner = 1, steps = 4\n",
      "00:47:06 [INFO] train episode 356: winner = 1, steps = 4\n",
      "00:47:06 [INFO] train episode 357: winner = 1, steps = 4\n",
      "00:47:09 [INFO] train episode 358: winner = 1, steps = 6\n",
      "00:47:10 [INFO] train episode 359: winner = 1, steps = 6\n",
      "00:47:11 [INFO] train episode 360: winner = 1, steps = 8\n",
      "00:47:17 [INFO] train episode 361: winner = 0, steps = 8\n",
      "00:47:23 [INFO] train episode 362: winner = -1, steps = 7\n",
      "00:47:30 [INFO] train episode 363: winner = 0, steps = 8\n",
      "00:47:31 [INFO] train episode 364: winner = 1, steps = 4\n",
      "00:47:34 [INFO] train episode 365: winner = 0, steps = 8\n",
      "00:47:36 [INFO] train episode 366: winner = 1, steps = 8\n",
      "00:47:36 [INFO] train episode 367: winner = 1, steps = 6\n",
      "00:47:39 [INFO] train episode 368: winner = 0, steps = 8\n",
      "00:47:42 [INFO] train episode 369: winner = 1, steps = 8\n",
      "00:47:44 [INFO] train episode 370: winner = 1, steps = 6\n",
      "00:47:48 [INFO] train episode 371: winner = 1, steps = 8\n",
      "00:47:50 [INFO] train episode 372: winner = 1, steps = 6\n",
      "00:47:52 [INFO] train episode 373: winner = 0, steps = 8\n",
      "00:47:52 [INFO] train episode 374: winner = 1, steps = 4\n",
      "00:47:57 [INFO] train episode 375: winner = 0, steps = 8\n",
      "00:47:58 [INFO] train episode 376: winner = 1, steps = 4\n",
      "00:48:02 [INFO] train episode 377: winner = 1, steps = 4\n",
      "00:48:05 [INFO] train episode 378: winner = 0, steps = 8\n",
      "00:48:08 [INFO] train episode 379: winner = 0, steps = 8\n",
      "00:48:09 [INFO] train episode 380: winner = 1, steps = 6\n",
      "00:48:10 [INFO] train episode 381: winner = 1, steps = 6\n",
      "00:48:10 [INFO] train episode 382: winner = 1, steps = 6\n",
      "00:48:12 [INFO] train episode 383: winner = 1, steps = 8\n",
      "00:48:14 [INFO] train episode 384: winner = -1, steps = 7\n",
      "00:48:17 [INFO] train episode 385: winner = 1, steps = 4\n",
      "00:48:18 [INFO] train episode 386: winner = 1, steps = 4\n",
      "00:48:18 [INFO] train episode 387: winner = 1, steps = 6\n",
      "00:48:22 [INFO] train episode 388: winner = 0, steps = 8\n",
      "00:48:22 [INFO] train episode 389: winner = 1, steps = 6\n",
      "00:48:22 [INFO] train episode 390: winner = 1, steps = 4\n",
      "00:48:26 [INFO] train episode 391: winner = 0, steps = 8\n",
      "00:48:29 [INFO] train episode 392: winner = 0, steps = 8\n",
      "00:48:31 [INFO] train episode 393: winner = 0, steps = 8\n",
      "00:48:32 [INFO] train episode 394: winner = 1, steps = 6\n",
      "00:48:34 [INFO] train episode 395: winner = -1, steps = 7\n",
      "00:48:34 [INFO] train episode 396: winner = 1, steps = 6\n",
      "00:48:37 [INFO] train episode 397: winner = 0, steps = 8\n",
      "00:48:37 [INFO] train episode 398: winner = 1, steps = 4\n",
      "00:48:38 [INFO] train episode 399: winner = 0, steps = 8\n",
      "00:48:40 [INFO] train episode 400: winner = 1, steps = 6\n",
      "00:48:40 [INFO] train episode 401: winner = 1, steps = 6\n",
      "00:48:40 [INFO] train episode 402: winner = 1, steps = 4\n",
      "00:48:41 [INFO] train episode 403: winner = 0, steps = 8\n",
      "00:48:41 [INFO] train episode 404: winner = 1, steps = 6\n",
      "00:48:42 [INFO] train episode 405: winner = 1, steps = 6\n",
      "00:48:44 [INFO] train episode 406: winner = 1, steps = 6\n",
      "00:48:44 [INFO] train episode 407: winner = -1, steps = 5\n",
      "00:48:45 [INFO] train episode 408: winner = -1, steps = 7\n",
      "00:48:46 [INFO] train episode 409: winner = 1, steps = 6\n",
      "00:48:47 [INFO] train episode 410: winner = 0, steps = 8\n",
      "00:48:48 [INFO] train episode 411: winner = 1, steps = 6\n",
      "00:48:50 [INFO] train episode 412: winner = 1, steps = 4\n",
      "00:48:50 [INFO] train episode 413: winner = 0, steps = 8\n",
      "00:48:50 [INFO] train episode 414: winner = 1, steps = 6\n",
      "00:48:50 [INFO] train episode 415: winner = 1, steps = 4\n",
      "00:48:51 [INFO] train episode 416: winner = 0, steps = 8\n",
      "00:48:52 [INFO] train episode 417: winner = 1, steps = 6\n",
      "00:48:52 [INFO] train episode 418: winner = 1, steps = 8\n",
      "00:48:53 [INFO] train episode 419: winner = -1, steps = 5\n",
      "00:48:55 [INFO] train episode 420: winner = -1, steps = 7\n",
      "00:48:55 [INFO] test episode 420:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "00:49:23 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "00:49:45 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "++o\n",
      "+++\n",
      "00:50:04 [INFO] step 2：player 1, action (1, 0)\n",
      "++x\n",
      "o+o\n",
      "+++\n",
      "00:50:05 [INFO] step 3：player -1, action (1, 1)\n",
      "++x\n",
      "oxo\n",
      "+++\n",
      "00:50:05 [INFO] step 4：player 1, action (0, 1)\n",
      "+ox\n",
      "oxo\n",
      "+++\n",
      "00:50:07 [INFO] step 5：player -1, action (2, 0)\n",
      "+ox\n",
      "oxo\n",
      "x++\n",
      "00:50:07 [INFO] test episode 420: winner = -1, steps = 5\n",
      "00:50:30 [INFO] train episode 421: winner = -1, steps = 5\n",
      "00:51:14 [INFO] train episode 422: winner = 0, steps = 8\n",
      "00:51:59 [INFO] train episode 423: winner = 1, steps = 8\n",
      "00:52:17 [INFO] train episode 424: winner = -1, steps = 5\n",
      "00:53:00 [INFO] train episode 425: winner = 0, steps = 8\n",
      "00:53:38 [INFO] train episode 426: winner = 1, steps = 6\n",
      "00:53:51 [INFO] train episode 427: winner = 0, steps = 8\n",
      "00:54:08 [INFO] train episode 428: winner = 1, steps = 8\n",
      "00:54:08 [INFO] train episode 429: winner = -1, steps = 5\n",
      "00:54:29 [INFO] train episode 430: winner = 1, steps = 4\n",
      "00:55:00 [INFO] train episode 431: winner = 1, steps = 6\n",
      "00:55:06 [INFO] train episode 432: winner = 1, steps = 4\n",
      "00:55:32 [INFO] train episode 433: winner = -1, steps = 7\n",
      "00:55:45 [INFO] train episode 434: winner = 1, steps = 6\n",
      "00:55:52 [INFO] train episode 435: winner = 1, steps = 4\n",
      "00:56:03 [INFO] train episode 436: winner = 1, steps = 8\n",
      "00:56:15 [INFO] train episode 437: winner = 0, steps = 8\n",
      "00:56:23 [INFO] train episode 438: winner = 1, steps = 4\n",
      "00:56:29 [INFO] train episode 439: winner = 0, steps = 8\n",
      "00:56:33 [INFO] train episode 440: winner = -1, steps = 7\n",
      "00:56:35 [INFO] train episode 441: winner = 0, steps = 8\n",
      "00:56:44 [INFO] train episode 442: winner = 1, steps = 4\n",
      "00:56:45 [INFO] train episode 443: winner = -1, steps = 5\n",
      "00:56:45 [INFO] train episode 444: winner = 1, steps = 4\n",
      "00:56:57 [INFO] train episode 445: winner = 1, steps = 6\n",
      "00:57:00 [INFO] train episode 446: winner = 1, steps = 4\n",
      "00:57:14 [INFO] train episode 447: winner = 1, steps = 6\n",
      "00:57:20 [INFO] train episode 448: winner = -1, steps = 7\n",
      "00:57:24 [INFO] train episode 449: winner = 1, steps = 6\n",
      "00:57:25 [INFO] train episode 450: winner = 1, steps = 6\n",
      "00:57:26 [INFO] train episode 451: winner = -1, steps = 7\n",
      "00:57:31 [INFO] train episode 452: winner = 0, steps = 8\n",
      "00:57:34 [INFO] train episode 453: winner = 1, steps = 6\n",
      "00:57:35 [INFO] train episode 454: winner = 1, steps = 6\n",
      "00:57:39 [INFO] train episode 455: winner = 1, steps = 4\n",
      "00:57:44 [INFO] train episode 456: winner = 1, steps = 4\n",
      "00:57:48 [INFO] train episode 457: winner = -1, steps = 7\n",
      "00:57:49 [INFO] train episode 458: winner = 1, steps = 6\n",
      "00:57:50 [INFO] train episode 459: winner = -1, steps = 7\n",
      "00:57:50 [INFO] train episode 460: winner = 1, steps = 6\n",
      "00:57:56 [INFO] train episode 461: winner = 1, steps = 6\n",
      "00:58:05 [INFO] train episode 462: winner = -1, steps = 7\n",
      "00:58:07 [INFO] train episode 463: winner = 1, steps = 4\n",
      "00:58:14 [INFO] train episode 464: winner = 0, steps = 8\n",
      "00:58:20 [INFO] train episode 465: winner = 0, steps = 8\n",
      "00:58:20 [INFO] train episode 466: winner = 1, steps = 4\n",
      "00:58:26 [INFO] train episode 467: winner = 0, steps = 8\n",
      "00:58:34 [INFO] train episode 468: winner = 1, steps = 6\n",
      "00:58:39 [INFO] train episode 469: winner = 0, steps = 8\n",
      "00:58:41 [INFO] train episode 470: winner = 1, steps = 4\n",
      "00:58:42 [INFO] train episode 471: winner = 1, steps = 6\n",
      "00:58:51 [INFO] train episode 472: winner = -1, steps = 7\n",
      "00:58:52 [INFO] train episode 473: winner = 1, steps = 6\n",
      "00:58:54 [INFO] train episode 474: winner = 1, steps = 6\n",
      "00:58:58 [INFO] train episode 475: winner = 1, steps = 6\n",
      "00:59:00 [INFO] train episode 476: winner = 0, steps = 8\n",
      "00:59:02 [INFO] train episode 477: winner = 1, steps = 4\n",
      "00:59:10 [INFO] train episode 478: winner = 0, steps = 8\n",
      "00:59:13 [INFO] train episode 479: winner = 1, steps = 6\n",
      "00:59:13 [INFO] train episode 480: winner = -1, steps = 7\n",
      "00:59:15 [INFO] train episode 481: winner = -1, steps = 7\n",
      "00:59:16 [INFO] train episode 482: winner = 0, steps = 8\n",
      "00:59:17 [INFO] train episode 483: winner = 1, steps = 8\n",
      "00:59:19 [INFO] train episode 484: winner = 1, steps = 8\n",
      "00:59:19 [INFO] train episode 485: winner = 1, steps = 6\n",
      "00:59:22 [INFO] train episode 486: winner = 1, steps = 6\n",
      "00:59:25 [INFO] train episode 487: winner = 1, steps = 6\n",
      "00:59:31 [INFO] train episode 488: winner = 0, steps = 8\n",
      "00:59:32 [INFO] train episode 489: winner = -1, steps = 7\n",
      "00:59:35 [INFO] train episode 490: winner = 0, steps = 8\n",
      "00:59:36 [INFO] train episode 491: winner = 1, steps = 6\n",
      "00:59:38 [INFO] train episode 492: winner = 1, steps = 4\n",
      "00:59:39 [INFO] train episode 493: winner = 1, steps = 6\n",
      "00:59:41 [INFO] train episode 494: winner = 1, steps = 4\n",
      "00:59:44 [INFO] train episode 495: winner = 1, steps = 6\n",
      "00:59:48 [INFO] train episode 496: winner = 1, steps = 6\n",
      "00:59:49 [INFO] train episode 497: winner = 0, steps = 8\n",
      "00:59:53 [INFO] train episode 498: winner = 0, steps = 8\n",
      "00:59:53 [INFO] train episode 499: winner = -1, steps = 7\n",
      "00:59:53 [INFO] train episode 500: winner = 1, steps = 4\n",
      "00:59:58 [INFO] train episode 501: winner = -1, steps = 7\n",
      "00:59:59 [INFO] train episode 502: winner = 1, steps = 4\n",
      "01:00:00 [INFO] train episode 503: winner = 1, steps = 6\n",
      "01:00:01 [INFO] train episode 504: winner = -1, steps = 5\n",
      "01:00:06 [INFO] train episode 505: winner = 0, steps = 8\n",
      "01:00:06 [INFO] train episode 506: winner = -1, steps = 7\n",
      "01:00:08 [INFO] train episode 507: winner = -1, steps = 7\n",
      "01:00:09 [INFO] train episode 508: winner = 1, steps = 6\n",
      "01:00:14 [INFO] train episode 509: winner = -1, steps = 5\n",
      "01:00:16 [INFO] train episode 510: winner = 0, steps = 8\n",
      "01:00:18 [INFO] train episode 511: winner = 1, steps = 6\n",
      "01:00:21 [INFO] train episode 512: winner = -1, steps = 5\n",
      "01:00:23 [INFO] train episode 513: winner = 1, steps = 6\n",
      "01:00:23 [INFO] train episode 514: winner = -1, steps = 7\n",
      "01:00:24 [INFO] train episode 515: winner = 1, steps = 6\n",
      "01:00:25 [INFO] train episode 516: winner = 1, steps = 6\n",
      "01:00:32 [INFO] train episode 517: winner = 1, steps = 6\n",
      "01:00:32 [INFO] train episode 518: winner = 1, steps = 6\n",
      "01:00:32 [INFO] train episode 519: winner = 1, steps = 6\n",
      "01:00:33 [INFO] train episode 520: winner = 1, steps = 4\n",
      "01:00:33 [INFO] train episode 521: winner = 1, steps = 6\n",
      "01:00:34 [INFO] train episode 522: winner = 1, steps = 4\n",
      "01:00:36 [INFO] train episode 523: winner = 1, steps = 6\n",
      "01:00:38 [INFO] train episode 524: winner = 1, steps = 4\n",
      "01:00:38 [INFO] train episode 525: winner = 1, steps = 6\n",
      "01:00:39 [INFO] train episode 526: winner = 1, steps = 6\n",
      "01:00:43 [INFO] train episode 527: winner = 0, steps = 8\n",
      "01:00:44 [INFO] train episode 528: winner = 1, steps = 4\n",
      "01:00:46 [INFO] train episode 529: winner = -1, steps = 5\n",
      "01:00:48 [INFO] train episode 530: winner = 0, steps = 8\n",
      "01:00:49 [INFO] train episode 531: winner = 1, steps = 4\n",
      "01:00:50 [INFO] train episode 532: winner = 0, steps = 8\n",
      "01:00:52 [INFO] train episode 533: winner = -1, steps = 5\n",
      "01:00:53 [INFO] train episode 534: winner = 0, steps = 8\n",
      "01:00:54 [INFO] train episode 535: winner = 1, steps = 6\n",
      "01:00:54 [INFO] train episode 536: winner = 1, steps = 6\n",
      "01:00:55 [INFO] train episode 537: winner = 1, steps = 4\n",
      "01:00:56 [INFO] train episode 538: winner = 0, steps = 8\n",
      "01:00:56 [INFO] train episode 539: winner = 1, steps = 6\n",
      "01:00:56 [INFO] train episode 540: winner = 1, steps = 6\n",
      "01:00:58 [INFO] train episode 541: winner = 0, steps = 8\n",
      "01:00:59 [INFO] train episode 542: winner = 1, steps = 6\n",
      "01:01:00 [INFO] train episode 543: winner = 0, steps = 8\n",
      "01:01:02 [INFO] train episode 544: winner = 0, steps = 8\n",
      "01:01:02 [INFO] train episode 545: winner = -1, steps = 5\n",
      "01:01:02 [INFO] train episode 546: winner = -1, steps = 5\n",
      "01:01:04 [INFO] train episode 547: winner = 1, steps = 6\n",
      "01:01:07 [INFO] train episode 548: winner = -1, steps = 5\n",
      "01:01:08 [INFO] train episode 549: winner = 0, steps = 8\n",
      "01:01:08 [INFO] train episode 550: winner = 1, steps = 6\n",
      "01:01:10 [INFO] train episode 551: winner = 1, steps = 8\n",
      "01:01:12 [INFO] train episode 552: winner = 1, steps = 6\n",
      "01:01:14 [INFO] train episode 553: winner = -1, steps = 7\n",
      "01:01:18 [INFO] train episode 554: winner = 0, steps = 8\n",
      "01:01:18 [INFO] train episode 555: winner = 1, steps = 4\n",
      "01:01:18 [INFO] train episode 556: winner = 1, steps = 6\n",
      "01:01:18 [INFO] train episode 557: winner = -1, steps = 7\n",
      "01:01:19 [INFO] train episode 558: winner = -1, steps = 5\n",
      "01:01:21 [INFO] train episode 559: winner = 1, steps = 6\n",
      "01:01:21 [INFO] test episode 559:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:01:47 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "01:02:14 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+o+\n",
      "+++\n",
      "01:02:30 [INFO] step 2：player 1, action (2, 2)\n",
      "+x+\n",
      "+o+\n",
      "++o\n",
      "01:02:32 [INFO] step 3：player -1, action (1, 0)\n",
      "+x+\n",
      "xo+\n",
      "++o\n",
      "01:02:34 [INFO] step 4：player 1, action (0, 0)\n",
      "ox+\n",
      "xo+\n",
      "++o\n",
      "01:02:34 [INFO] test episode 559: winner = 1, steps = 4\n",
      "01:03:13 [INFO] train episode 560: winner = 1, steps = 4\n",
      "01:03:37 [INFO] train episode 561: winner = 1, steps = 4\n",
      "01:04:12 [INFO] train episode 562: winner = 1, steps = 4\n",
      "01:04:58 [INFO] train episode 563: winner = 0, steps = 8\n",
      "01:05:14 [INFO] train episode 564: winner = 0, steps = 8\n",
      "01:05:38 [INFO] train episode 565: winner = 1, steps = 4\n",
      "01:05:41 [INFO] train episode 566: winner = 1, steps = 6\n",
      "01:06:04 [INFO] train episode 567: winner = 1, steps = 6\n",
      "01:06:20 [INFO] train episode 568: winner = 0, steps = 8\n",
      "01:06:33 [INFO] train episode 569: winner = -1, steps = 5\n",
      "01:06:44 [INFO] train episode 570: winner = 0, steps = 8\n",
      "01:06:45 [INFO] train episode 571: winner = 1, steps = 6\n",
      "01:06:57 [INFO] train episode 572: winner = 0, steps = 8\n",
      "01:07:01 [INFO] train episode 573: winner = 1, steps = 6\n",
      "01:07:33 [INFO] train episode 574: winner = 0, steps = 8\n",
      "01:08:04 [INFO] train episode 575: winner = 1, steps = 4\n",
      "01:08:05 [INFO] train episode 576: winner = 1, steps = 6\n",
      "01:08:07 [INFO] train episode 577: winner = 1, steps = 4\n",
      "01:08:09 [INFO] train episode 578: winner = 1, steps = 6\n",
      "01:08:15 [INFO] train episode 579: winner = 0, steps = 8\n",
      "01:08:26 [INFO] train episode 580: winner = 1, steps = 4\n",
      "01:08:29 [INFO] train episode 581: winner = 1, steps = 4\n",
      "01:08:37 [INFO] train episode 582: winner = 1, steps = 6\n",
      "01:08:44 [INFO] train episode 583: winner = 1, steps = 4\n",
      "01:08:54 [INFO] train episode 584: winner = 1, steps = 6\n",
      "01:08:59 [INFO] train episode 585: winner = -1, steps = 7\n",
      "01:09:00 [INFO] train episode 586: winner = 1, steps = 6\n",
      "01:09:00 [INFO] train episode 587: winner = 1, steps = 4\n",
      "01:09:07 [INFO] train episode 588: winner = -1, steps = 5\n",
      "01:09:10 [INFO] train episode 589: winner = 1, steps = 6\n",
      "01:09:12 [INFO] train episode 590: winner = 1, steps = 6\n",
      "01:09:16 [INFO] train episode 591: winner = 0, steps = 8\n",
      "01:09:19 [INFO] train episode 592: winner = 1, steps = 6\n",
      "01:09:19 [INFO] train episode 593: winner = 0, steps = 8\n",
      "01:09:27 [INFO] train episode 594: winner = 1, steps = 6\n",
      "01:09:30 [INFO] train episode 595: winner = 1, steps = 4\n",
      "01:09:36 [INFO] train episode 596: winner = 1, steps = 6\n",
      "01:09:42 [INFO] train episode 597: winner = -1, steps = 5\n",
      "01:09:46 [INFO] train episode 598: winner = 1, steps = 8\n",
      "01:09:50 [INFO] train episode 599: winner = 0, steps = 8\n",
      "01:09:58 [INFO] train episode 600: winner = 0, steps = 8\n",
      "01:10:01 [INFO] train episode 601: winner = -1, steps = 5\n",
      "01:10:01 [INFO] train episode 602: winner = 1, steps = 6\n",
      "01:10:01 [INFO] train episode 603: winner = 1, steps = 6\n",
      "01:10:05 [INFO] train episode 604: winner = 1, steps = 6\n",
      "01:10:06 [INFO] train episode 605: winner = 1, steps = 6\n",
      "01:10:12 [INFO] train episode 606: winner = 1, steps = 6\n",
      "01:10:17 [INFO] train episode 607: winner = 1, steps = 6\n",
      "01:10:21 [INFO] train episode 608: winner = 1, steps = 4\n",
      "01:10:28 [INFO] train episode 609: winner = 0, steps = 8\n",
      "01:10:34 [INFO] train episode 610: winner = 0, steps = 8\n",
      "01:10:38 [INFO] train episode 611: winner = -1, steps = 7\n",
      "01:10:41 [INFO] train episode 612: winner = 1, steps = 6\n",
      "01:10:41 [INFO] train episode 613: winner = -1, steps = 7\n",
      "01:10:43 [INFO] train episode 614: winner = -1, steps = 5\n",
      "01:10:45 [INFO] train episode 615: winner = -1, steps = 7\n",
      "01:10:47 [INFO] train episode 616: winner = 0, steps = 8\n",
      "01:10:48 [INFO] train episode 617: winner = -1, steps = 5\n",
      "01:10:49 [INFO] train episode 618: winner = 1, steps = 6\n",
      "01:10:52 [INFO] train episode 619: winner = 0, steps = 8\n",
      "01:10:54 [INFO] train episode 620: winner = 1, steps = 8\n",
      "01:10:57 [INFO] train episode 621: winner = 0, steps = 8\n",
      "01:10:59 [INFO] train episode 622: winner = 1, steps = 6\n",
      "01:11:06 [INFO] train episode 623: winner = 0, steps = 8\n",
      "01:11:09 [INFO] train episode 624: winner = 0, steps = 8\n",
      "01:11:10 [INFO] train episode 625: winner = 1, steps = 4\n",
      "01:11:20 [INFO] train episode 626: winner = 1, steps = 6\n",
      "01:11:21 [INFO] train episode 627: winner = -1, steps = 5\n",
      "01:11:31 [INFO] train episode 628: winner = -1, steps = 7\n",
      "01:11:32 [INFO] train episode 629: winner = 1, steps = 6\n",
      "01:11:33 [INFO] train episode 630: winner = 1, steps = 6\n",
      "01:11:34 [INFO] train episode 631: winner = 1, steps = 6\n",
      "01:11:35 [INFO] train episode 632: winner = 1, steps = 8\n",
      "01:11:37 [INFO] train episode 633: winner = 1, steps = 4\n",
      "01:11:38 [INFO] train episode 634: winner = -1, steps = 7\n",
      "01:11:42 [INFO] train episode 635: winner = 0, steps = 8\n",
      "01:11:42 [INFO] train episode 636: winner = -1, steps = 5\n",
      "01:11:47 [INFO] train episode 637: winner = 1, steps = 6\n",
      "01:11:50 [INFO] train episode 638: winner = 1, steps = 4\n",
      "01:11:51 [INFO] train episode 639: winner = -1, steps = 5\n",
      "01:11:51 [INFO] train episode 640: winner = 0, steps = 8\n",
      "01:11:51 [INFO] train episode 641: winner = -1, steps = 7\n",
      "01:11:52 [INFO] train episode 642: winner = 0, steps = 8\n",
      "01:11:53 [INFO] train episode 643: winner = 1, steps = 8\n",
      "01:11:54 [INFO] train episode 644: winner = 0, steps = 8\n",
      "01:11:55 [INFO] train episode 645: winner = 1, steps = 4\n",
      "01:11:55 [INFO] train episode 646: winner = 1, steps = 6\n",
      "01:11:58 [INFO] train episode 647: winner = -1, steps = 7\n",
      "01:12:06 [INFO] train episode 648: winner = 1, steps = 6\n",
      "01:12:06 [INFO] train episode 649: winner = -1, steps = 5\n",
      "01:12:06 [INFO] train episode 650: winner = 1, steps = 6\n",
      "01:12:11 [INFO] train episode 651: winner = 0, steps = 8\n",
      "01:12:13 [INFO] train episode 652: winner = 1, steps = 6\n",
      "01:12:15 [INFO] train episode 653: winner = 0, steps = 8\n",
      "01:12:15 [INFO] train episode 654: winner = 1, steps = 4\n",
      "01:12:19 [INFO] train episode 655: winner = -1, steps = 7\n",
      "01:12:21 [INFO] train episode 656: winner = 1, steps = 8\n",
      "01:12:23 [INFO] train episode 657: winner = -1, steps = 7\n",
      "01:12:24 [INFO] train episode 658: winner = 1, steps = 6\n",
      "01:12:26 [INFO] train episode 659: winner = 1, steps = 4\n",
      "01:12:26 [INFO] train episode 660: winner = 1, steps = 4\n",
      "01:12:26 [INFO] train episode 661: winner = 0, steps = 8\n",
      "01:12:29 [INFO] train episode 662: winner = 1, steps = 4\n",
      "01:12:30 [INFO] train episode 663: winner = 1, steps = 4\n",
      "01:12:30 [INFO] train episode 664: winner = 1, steps = 6\n",
      "01:12:31 [INFO] train episode 665: winner = 0, steps = 8\n",
      "01:12:31 [INFO] train episode 666: winner = 1, steps = 6\n",
      "01:12:37 [INFO] train episode 667: winner = 0, steps = 8\n",
      "01:12:38 [INFO] train episode 668: winner = 0, steps = 8\n",
      "01:12:44 [INFO] train episode 669: winner = 1, steps = 6\n",
      "01:12:44 [INFO] train episode 670: winner = 1, steps = 4\n",
      "01:12:46 [INFO] train episode 671: winner = 1, steps = 8\n",
      "01:12:46 [INFO] train episode 672: winner = -1, steps = 7\n",
      "01:12:46 [INFO] train episode 673: winner = 1, steps = 6\n",
      "01:12:49 [INFO] train episode 674: winner = 1, steps = 6\n",
      "01:12:50 [INFO] train episode 675: winner = 1, steps = 6\n",
      "01:12:50 [INFO] train episode 676: winner = 0, steps = 8\n",
      "01:12:50 [INFO] train episode 677: winner = 1, steps = 6\n",
      "01:12:51 [INFO] train episode 678: winner = 1, steps = 6\n",
      "01:12:53 [INFO] train episode 679: winner = 1, steps = 6\n",
      "01:12:54 [INFO] train episode 680: winner = -1, steps = 5\n",
      "01:12:55 [INFO] train episode 681: winner = 1, steps = 6\n",
      "01:12:56 [INFO] train episode 682: winner = 1, steps = 4\n",
      "01:12:56 [INFO] train episode 683: winner = 1, steps = 6\n",
      "01:12:57 [INFO] train episode 684: winner = -1, steps = 7\n",
      "01:13:01 [INFO] train episode 685: winner = 0, steps = 8\n",
      "01:13:03 [INFO] train episode 686: winner = 1, steps = 6\n",
      "01:13:03 [INFO] train episode 687: winner = -1, steps = 5\n",
      "01:13:04 [INFO] train episode 688: winner = 0, steps = 8\n",
      "01:13:04 [INFO] train episode 689: winner = -1, steps = 7\n",
      "01:13:07 [INFO] train episode 690: winner = 0, steps = 8\n",
      "01:13:07 [INFO] train episode 691: winner = -1, steps = 7\n",
      "01:13:15 [INFO] train episode 692: winner = 0, steps = 8\n",
      "01:13:19 [INFO] train episode 693: winner = 0, steps = 8\n",
      "01:13:19 [INFO] train episode 694: winner = 1, steps = 6\n",
      "01:13:23 [INFO] train episode 695: winner = 1, steps = 6\n",
      "01:13:24 [INFO] train episode 696: winner = 1, steps = 6\n",
      "01:13:25 [INFO] train episode 697: winner = -1, steps = 7\n",
      "01:13:25 [INFO] test episode 697:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:13:50 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "01:14:11 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "++o\n",
      "+++\n",
      "01:14:29 [INFO] step 2：player 1, action (2, 0)\n",
      "++x\n",
      "++o\n",
      "o++\n",
      "01:14:41 [INFO] step 3：player -1, action (2, 1)\n",
      "++x\n",
      "++o\n",
      "ox+\n",
      "01:14:43 [INFO] step 4：player 1, action (1, 0)\n",
      "++x\n",
      "o+o\n",
      "ox+\n",
      "01:14:43 [INFO] step 5：player -1, action (0, 1)\n",
      "+xx\n",
      "o+o\n",
      "ox+\n",
      "01:14:43 [INFO] step 6：player 1, action (0, 0)\n",
      "oxx\n",
      "o+o\n",
      "ox+\n",
      "01:14:43 [INFO] test episode 697: winner = 1, steps = 6\n",
      "01:15:25 [INFO] train episode 698: winner = 1, steps = 6\n",
      "01:15:55 [INFO] train episode 699: winner = 1, steps = 6\n",
      "01:16:24 [INFO] train episode 700: winner = 1, steps = 6\n",
      "01:16:57 [INFO] train episode 701: winner = 1, steps = 6\n",
      "01:17:19 [INFO] train episode 702: winner = 1, steps = 6\n",
      "01:17:22 [INFO] train episode 703: winner = -1, steps = 5\n",
      "01:17:58 [INFO] train episode 704: winner = 0, steps = 8\n",
      "01:18:06 [INFO] train episode 705: winner = 1, steps = 6\n",
      "01:18:12 [INFO] train episode 706: winner = 0, steps = 8\n",
      "01:18:23 [INFO] train episode 707: winner = -1, steps = 7\n",
      "01:18:51 [INFO] train episode 708: winner = 1, steps = 4\n",
      "01:19:16 [INFO] train episode 709: winner = 1, steps = 6\n",
      "01:19:29 [INFO] train episode 710: winner = 1, steps = 6\n",
      "01:19:32 [INFO] train episode 711: winner = 1, steps = 6\n",
      "01:19:34 [INFO] train episode 712: winner = 1, steps = 4\n",
      "01:19:49 [INFO] train episode 713: winner = 0, steps = 8\n",
      "01:20:10 [INFO] train episode 714: winner = 1, steps = 6\n",
      "01:20:19 [INFO] train episode 715: winner = 0, steps = 8\n",
      "01:20:36 [INFO] train episode 716: winner = 1, steps = 6\n",
      "01:20:40 [INFO] train episode 717: winner = 1, steps = 8\n",
      "01:20:45 [INFO] train episode 718: winner = 1, steps = 4\n",
      "01:20:51 [INFO] train episode 719: winner = 1, steps = 6\n",
      "01:20:53 [INFO] train episode 720: winner = 1, steps = 6\n",
      "01:20:57 [INFO] train episode 721: winner = 0, steps = 8\n",
      "01:21:02 [INFO] train episode 722: winner = 1, steps = 6\n",
      "01:21:04 [INFO] train episode 723: winner = 1, steps = 6\n",
      "01:21:11 [INFO] train episode 724: winner = 1, steps = 6\n",
      "01:21:21 [INFO] train episode 725: winner = 1, steps = 4\n",
      "01:21:26 [INFO] train episode 726: winner = 1, steps = 4\n",
      "01:21:33 [INFO] train episode 727: winner = 1, steps = 6\n",
      "01:21:35 [INFO] train episode 728: winner = 0, steps = 8\n",
      "01:21:38 [INFO] train episode 729: winner = 1, steps = 6\n",
      "01:21:39 [INFO] train episode 730: winner = 0, steps = 8\n",
      "01:21:43 [INFO] train episode 731: winner = 0, steps = 8\n",
      "01:21:59 [INFO] train episode 732: winner = 1, steps = 8\n",
      "01:22:00 [INFO] train episode 733: winner = 1, steps = 6\n",
      "01:22:02 [INFO] train episode 734: winner = 0, steps = 8\n",
      "01:22:04 [INFO] train episode 735: winner = -1, steps = 7\n",
      "01:22:09 [INFO] train episode 736: winner = -1, steps = 7\n",
      "01:22:09 [INFO] train episode 737: winner = -1, steps = 5\n",
      "01:22:17 [INFO] train episode 738: winner = 1, steps = 4\n",
      "01:22:21 [INFO] train episode 739: winner = 1, steps = 4\n",
      "01:22:22 [INFO] train episode 740: winner = 1, steps = 6\n",
      "01:22:24 [INFO] train episode 741: winner = 0, steps = 8\n",
      "01:22:27 [INFO] train episode 742: winner = 1, steps = 4\n",
      "01:22:28 [INFO] train episode 743: winner = 1, steps = 6\n",
      "01:22:41 [INFO] train episode 744: winner = 0, steps = 8\n",
      "01:22:41 [INFO] train episode 745: winner = -1, steps = 7\n",
      "01:22:45 [INFO] train episode 746: winner = 1, steps = 4\n",
      "01:22:47 [INFO] train episode 747: winner = 0, steps = 8\n",
      "01:22:51 [INFO] train episode 748: winner = 1, steps = 8\n",
      "01:22:54 [INFO] train episode 749: winner = 1, steps = 4\n",
      "01:22:55 [INFO] train episode 750: winner = 1, steps = 4\n",
      "01:22:57 [INFO] train episode 751: winner = 0, steps = 8\n",
      "01:23:02 [INFO] train episode 752: winner = 0, steps = 8\n",
      "01:23:04 [INFO] train episode 753: winner = 1, steps = 6\n",
      "01:23:06 [INFO] train episode 754: winner = -1, steps = 5\n",
      "01:23:08 [INFO] train episode 755: winner = 1, steps = 6\n",
      "01:23:09 [INFO] train episode 756: winner = 1, steps = 4\n",
      "01:23:14 [INFO] train episode 757: winner = 0, steps = 8\n",
      "01:23:17 [INFO] train episode 758: winner = 1, steps = 6\n",
      "01:23:18 [INFO] train episode 759: winner = -1, steps = 7\n",
      "01:23:19 [INFO] train episode 760: winner = 1, steps = 6\n",
      "01:23:21 [INFO] train episode 761: winner = 1, steps = 4\n",
      "01:23:22 [INFO] train episode 762: winner = 1, steps = 6\n",
      "01:23:22 [INFO] train episode 763: winner = 1, steps = 4\n",
      "01:23:22 [INFO] train episode 764: winner = -1, steps = 7\n",
      "01:23:25 [INFO] train episode 765: winner = -1, steps = 5\n",
      "01:23:26 [INFO] train episode 766: winner = 1, steps = 8\n",
      "01:23:29 [INFO] train episode 767: winner = 1, steps = 4\n",
      "01:23:30 [INFO] train episode 768: winner = 1, steps = 4\n",
      "01:23:31 [INFO] train episode 769: winner = 1, steps = 6\n",
      "01:23:32 [INFO] train episode 770: winner = -1, steps = 7\n",
      "01:23:32 [INFO] train episode 771: winner = 1, steps = 6\n",
      "01:23:34 [INFO] train episode 772: winner = 1, steps = 4\n",
      "01:23:40 [INFO] train episode 773: winner = 1, steps = 4\n",
      "01:23:41 [INFO] train episode 774: winner = 1, steps = 6\n",
      "01:23:45 [INFO] train episode 775: winner = 1, steps = 6\n",
      "01:23:46 [INFO] train episode 776: winner = 1, steps = 6\n",
      "01:23:50 [INFO] train episode 777: winner = 1, steps = 8\n",
      "01:23:51 [INFO] train episode 778: winner = 1, steps = 6\n",
      "01:23:51 [INFO] train episode 779: winner = 1, steps = 4\n",
      "01:23:53 [INFO] train episode 780: winner = 1, steps = 6\n",
      "01:24:00 [INFO] train episode 781: winner = 0, steps = 8\n",
      "01:24:03 [INFO] train episode 782: winner = 1, steps = 4\n",
      "01:24:04 [INFO] train episode 783: winner = 1, steps = 6\n",
      "01:24:05 [INFO] train episode 784: winner = 1, steps = 4\n",
      "01:24:08 [INFO] train episode 785: winner = 0, steps = 8\n",
      "01:24:11 [INFO] train episode 786: winner = 1, steps = 4\n",
      "01:24:12 [INFO] train episode 787: winner = 1, steps = 6\n",
      "01:24:20 [INFO] train episode 788: winner = 0, steps = 8\n",
      "01:24:21 [INFO] train episode 789: winner = 1, steps = 4\n",
      "01:24:21 [INFO] train episode 790: winner = 1, steps = 4\n",
      "01:24:26 [INFO] train episode 791: winner = 0, steps = 8\n",
      "01:24:27 [INFO] train episode 792: winner = 1, steps = 6\n",
      "01:24:27 [INFO] train episode 793: winner = 0, steps = 8\n",
      "01:24:30 [INFO] train episode 794: winner = 1, steps = 6\n",
      "01:24:33 [INFO] train episode 795: winner = 0, steps = 8\n",
      "01:24:41 [INFO] train episode 796: winner = -1, steps = 7\n",
      "01:24:43 [INFO] train episode 797: winner = 0, steps = 8\n",
      "01:24:45 [INFO] train episode 798: winner = 1, steps = 4\n",
      "01:24:46 [INFO] train episode 799: winner = 1, steps = 6\n",
      "01:24:48 [INFO] train episode 800: winner = 0, steps = 8\n",
      "01:24:49 [INFO] train episode 801: winner = 1, steps = 4\n",
      "01:24:49 [INFO] train episode 802: winner = 1, steps = 4\n",
      "01:24:49 [INFO] train episode 803: winner = 1, steps = 6\n",
      "01:24:50 [INFO] train episode 804: winner = 1, steps = 6\n",
      "01:24:51 [INFO] train episode 805: winner = 0, steps = 8\n",
      "01:24:52 [INFO] train episode 806: winner = 0, steps = 8\n",
      "01:24:55 [INFO] train episode 807: winner = 0, steps = 8\n",
      "01:24:56 [INFO] train episode 808: winner = -1, steps = 5\n",
      "01:24:56 [INFO] train episode 809: winner = 0, steps = 8\n",
      "01:24:56 [INFO] train episode 810: winner = 1, steps = 6\n",
      "01:24:58 [INFO] train episode 811: winner = 1, steps = 6\n",
      "01:24:59 [INFO] train episode 812: winner = 1, steps = 6\n",
      "01:25:00 [INFO] train episode 813: winner = 1, steps = 6\n",
      "01:25:02 [INFO] train episode 814: winner = 1, steps = 6\n",
      "01:25:03 [INFO] train episode 815: winner = 0, steps = 8\n",
      "01:25:06 [INFO] train episode 816: winner = 1, steps = 8\n",
      "01:25:07 [INFO] train episode 817: winner = 1, steps = 6\n",
      "01:25:09 [INFO] train episode 818: winner = -1, steps = 5\n",
      "01:25:10 [INFO] train episode 819: winner = 1, steps = 4\n",
      "01:25:14 [INFO] train episode 820: winner = -1, steps = 7\n",
      "01:25:15 [INFO] train episode 821: winner = 0, steps = 8\n",
      "01:25:16 [INFO] train episode 822: winner = 1, steps = 6\n",
      "01:25:16 [INFO] train episode 823: winner = 1, steps = 6\n",
      "01:25:16 [INFO] train episode 824: winner = 0, steps = 8\n",
      "01:25:19 [INFO] train episode 825: winner = 1, steps = 6\n",
      "01:25:20 [INFO] train episode 826: winner = 1, steps = 4\n",
      "01:25:21 [INFO] train episode 827: winner = 1, steps = 6\n",
      "01:25:22 [INFO] train episode 828: winner = 1, steps = 6\n",
      "01:25:23 [INFO] train episode 829: winner = -1, steps = 7\n",
      "01:25:25 [INFO] train episode 830: winner = 0, steps = 8\n",
      "01:25:27 [INFO] train episode 831: winner = 1, steps = 6\n",
      "01:25:27 [INFO] train episode 832: winner = 1, steps = 4\n",
      "01:25:31 [INFO] train episode 833: winner = 0, steps = 8\n",
      "01:25:31 [INFO] train episode 834: winner = 1, steps = 6\n",
      "01:25:31 [INFO] train episode 835: winner = 1, steps = 4\n",
      "01:25:32 [INFO] train episode 836: winner = 1, steps = 4\n",
      "01:25:34 [INFO] train episode 837: winner = 1, steps = 4\n",
      "01:25:37 [INFO] train episode 838: winner = 1, steps = 6\n",
      "01:25:37 [INFO] test episode 838:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:26:04 [INFO] step 0：player 1, action (0, 2)\n",
      "++o\n",
      "+++\n",
      "+++\n",
      "01:26:26 [INFO] step 1：player -1, action (0, 0)\n",
      "x+o\n",
      "+++\n",
      "+++\n",
      "01:26:38 [INFO] step 2：player 1, action (2, 2)\n",
      "x+o\n",
      "+++\n",
      "++o\n",
      "01:26:38 [INFO] step 3：player -1, action (2, 0)\n",
      "x+o\n",
      "+++\n",
      "x+o\n",
      "01:26:39 [INFO] step 4：player 1, action (1, 2)\n",
      "x+o\n",
      "++o\n",
      "x+o\n",
      "01:26:39 [INFO] test episode 838: winner = 1, steps = 4\n",
      "01:26:57 [INFO] train episode 839: winner = 1, steps = 6\n",
      "01:27:27 [INFO] train episode 840: winner = 0, steps = 8\n",
      "01:28:06 [INFO] train episode 841: winner = 1, steps = 8\n",
      "01:28:42 [INFO] train episode 842: winner = 1, steps = 8\n",
      "01:29:35 [INFO] train episode 843: winner = 0, steps = 8\n",
      "01:29:51 [INFO] train episode 844: winner = 0, steps = 8\n",
      "01:29:59 [INFO] train episode 845: winner = 1, steps = 6\n",
      "01:30:08 [INFO] train episode 846: winner = 1, steps = 4\n",
      "01:30:41 [INFO] train episode 847: winner = 1, steps = 4\n",
      "01:30:43 [INFO] train episode 848: winner = 1, steps = 4\n",
      "01:30:49 [INFO] train episode 849: winner = 1, steps = 4\n",
      "01:31:14 [INFO] train episode 850: winner = 0, steps = 8\n",
      "01:31:25 [INFO] train episode 851: winner = 1, steps = 6\n",
      "01:31:29 [INFO] train episode 852: winner = 1, steps = 6\n",
      "01:31:31 [INFO] train episode 853: winner = 1, steps = 6\n",
      "01:31:50 [INFO] train episode 854: winner = 1, steps = 6\n",
      "01:32:15 [INFO] train episode 855: winner = 0, steps = 8\n",
      "01:32:21 [INFO] train episode 856: winner = 1, steps = 6\n",
      "01:32:28 [INFO] train episode 857: winner = 1, steps = 4\n",
      "01:32:30 [INFO] train episode 858: winner = 0, steps = 8\n",
      "01:32:38 [INFO] train episode 859: winner = 0, steps = 8\n",
      "01:32:45 [INFO] train episode 860: winner = -1, steps = 5\n",
      "01:32:48 [INFO] train episode 861: winner = 1, steps = 6\n",
      "01:32:48 [INFO] train episode 862: winner = 1, steps = 4\n",
      "01:32:50 [INFO] train episode 863: winner = -1, steps = 7\n",
      "01:32:52 [INFO] train episode 864: winner = 1, steps = 6\n",
      "01:32:57 [INFO] train episode 865: winner = 0, steps = 8\n",
      "01:33:02 [INFO] train episode 866: winner = 0, steps = 8\n",
      "01:33:05 [INFO] train episode 867: winner = 1, steps = 6\n",
      "01:33:13 [INFO] train episode 868: winner = -1, steps = 7\n",
      "01:33:15 [INFO] train episode 869: winner = -1, steps = 5\n",
      "01:33:22 [INFO] train episode 870: winner = 1, steps = 4\n",
      "01:33:28 [INFO] train episode 871: winner = 1, steps = 8\n",
      "01:33:32 [INFO] train episode 872: winner = 1, steps = 4\n",
      "01:33:59 [INFO] train episode 873: winner = 0, steps = 8\n",
      "01:33:59 [INFO] train episode 874: winner = 1, steps = 4\n",
      "01:33:59 [INFO] train episode 875: winner = 1, steps = 6\n",
      "01:34:00 [INFO] train episode 876: winner = 1, steps = 6\n",
      "01:34:01 [INFO] train episode 877: winner = 0, steps = 8\n",
      "01:34:08 [INFO] train episode 878: winner = 0, steps = 8\n",
      "01:34:16 [INFO] train episode 879: winner = 0, steps = 8\n",
      "01:34:21 [INFO] train episode 880: winner = 1, steps = 4\n",
      "01:34:22 [INFO] train episode 881: winner = 1, steps = 4\n",
      "01:34:27 [INFO] train episode 882: winner = 1, steps = 6\n",
      "01:34:27 [INFO] train episode 883: winner = 1, steps = 4\n",
      "01:34:37 [INFO] train episode 884: winner = 0, steps = 8\n",
      "01:34:45 [INFO] train episode 885: winner = 0, steps = 8\n",
      "01:34:47 [INFO] train episode 886: winner = 1, steps = 4\n",
      "01:34:48 [INFO] train episode 887: winner = 1, steps = 6\n",
      "01:34:52 [INFO] train episode 888: winner = 1, steps = 8\n",
      "01:34:52 [INFO] train episode 889: winner = 1, steps = 6\n",
      "01:35:01 [INFO] train episode 890: winner = 0, steps = 8\n",
      "01:35:06 [INFO] train episode 891: winner = 1, steps = 6\n",
      "01:35:08 [INFO] train episode 892: winner = 1, steps = 4\n",
      "01:35:10 [INFO] train episode 893: winner = 0, steps = 8\n",
      "01:35:16 [INFO] train episode 894: winner = 1, steps = 4\n",
      "01:35:22 [INFO] train episode 895: winner = 0, steps = 8\n",
      "01:35:25 [INFO] train episode 896: winner = 0, steps = 8\n",
      "01:35:28 [INFO] train episode 897: winner = -1, steps = 5\n",
      "01:35:30 [INFO] train episode 898: winner = 1, steps = 6\n",
      "01:35:35 [INFO] train episode 899: winner = 0, steps = 8\n",
      "01:35:35 [INFO] train episode 900: winner = 1, steps = 4\n",
      "01:35:36 [INFO] train episode 901: winner = 1, steps = 8\n",
      "01:35:38 [INFO] train episode 902: winner = 1, steps = 4\n",
      "01:35:39 [INFO] train episode 903: winner = 1, steps = 6\n",
      "01:35:43 [INFO] train episode 904: winner = -1, steps = 5\n",
      "01:35:45 [INFO] train episode 905: winner = 1, steps = 6\n",
      "01:35:47 [INFO] train episode 906: winner = 1, steps = 4\n",
      "01:35:48 [INFO] train episode 907: winner = -1, steps = 5\n",
      "01:35:49 [INFO] train episode 908: winner = 1, steps = 6\n",
      "01:35:51 [INFO] train episode 909: winner = 0, steps = 8\n",
      "01:35:54 [INFO] train episode 910: winner = -1, steps = 5\n",
      "01:35:55 [INFO] train episode 911: winner = 1, steps = 4\n",
      "01:35:56 [INFO] train episode 912: winner = 1, steps = 8\n",
      "01:35:57 [INFO] train episode 913: winner = 0, steps = 8\n",
      "01:35:58 [INFO] train episode 914: winner = -1, steps = 7\n",
      "01:35:58 [INFO] train episode 915: winner = 0, steps = 8\n",
      "01:35:59 [INFO] train episode 916: winner = -1, steps = 5\n",
      "01:36:05 [INFO] train episode 917: winner = 0, steps = 8\n",
      "01:36:05 [INFO] train episode 918: winner = 1, steps = 4\n",
      "01:36:07 [INFO] train episode 919: winner = -1, steps = 7\n",
      "01:36:11 [INFO] train episode 920: winner = 0, steps = 8\n",
      "01:36:12 [INFO] train episode 921: winner = 0, steps = 8\n",
      "01:36:12 [INFO] train episode 922: winner = 1, steps = 6\n",
      "01:36:16 [INFO] train episode 923: winner = 1, steps = 6\n",
      "01:36:17 [INFO] train episode 924: winner = 0, steps = 8\n",
      "01:36:21 [INFO] train episode 925: winner = 0, steps = 8\n",
      "01:36:22 [INFO] train episode 926: winner = 1, steps = 4\n",
      "01:36:26 [INFO] train episode 927: winner = 0, steps = 8\n",
      "01:36:28 [INFO] train episode 928: winner = 1, steps = 4\n",
      "01:36:29 [INFO] train episode 929: winner = 1, steps = 4\n",
      "01:36:31 [INFO] train episode 930: winner = 0, steps = 8\n",
      "01:36:31 [INFO] train episode 931: winner = 1, steps = 4\n",
      "01:36:31 [INFO] train episode 932: winner = 1, steps = 8\n",
      "01:36:32 [INFO] train episode 933: winner = 0, steps = 8\n",
      "01:36:34 [INFO] train episode 934: winner = 1, steps = 6\n",
      "01:36:35 [INFO] train episode 935: winner = 1, steps = 6\n",
      "01:36:37 [INFO] train episode 936: winner = 0, steps = 8\n",
      "01:36:39 [INFO] train episode 937: winner = -1, steps = 7\n",
      "01:36:41 [INFO] train episode 938: winner = 0, steps = 8\n",
      "01:36:44 [INFO] train episode 939: winner = 1, steps = 4\n",
      "01:36:45 [INFO] train episode 940: winner = 1, steps = 4\n",
      "01:36:47 [INFO] train episode 941: winner = 1, steps = 6\n",
      "01:36:49 [INFO] train episode 942: winner = 1, steps = 4\n",
      "01:36:51 [INFO] train episode 943: winner = 1, steps = 6\n",
      "01:36:52 [INFO] train episode 944: winner = 1, steps = 6\n",
      "01:36:54 [INFO] train episode 945: winner = 1, steps = 6\n",
      "01:36:55 [INFO] train episode 946: winner = -1, steps = 5\n",
      "01:36:55 [INFO] train episode 947: winner = 0, steps = 8\n",
      "01:36:56 [INFO] train episode 948: winner = 0, steps = 8\n",
      "01:36:57 [INFO] train episode 949: winner = 1, steps = 6\n",
      "01:36:57 [INFO] train episode 950: winner = 1, steps = 6\n",
      "01:36:58 [INFO] train episode 951: winner = 0, steps = 8\n",
      "01:36:58 [INFO] train episode 952: winner = 1, steps = 6\n",
      "01:36:58 [INFO] train episode 953: winner = 1, steps = 6\n",
      "01:36:59 [INFO] train episode 954: winner = 1, steps = 6\n",
      "01:37:00 [INFO] train episode 955: winner = 0, steps = 8\n",
      "01:37:00 [INFO] train episode 956: winner = 1, steps = 4\n",
      "01:37:00 [INFO] train episode 957: winner = 0, steps = 8\n",
      "01:37:03 [INFO] train episode 958: winner = 1, steps = 4\n",
      "01:37:04 [INFO] train episode 959: winner = -1, steps = 5\n",
      "01:37:04 [INFO] train episode 960: winner = 0, steps = 8\n",
      "01:37:07 [INFO] train episode 961: winner = 1, steps = 6\n",
      "01:37:11 [INFO] train episode 962: winner = 0, steps = 8\n",
      "01:37:14 [INFO] train episode 963: winner = -1, steps = 7\n",
      "01:37:14 [INFO] train episode 964: winner = 1, steps = 6\n",
      "01:37:18 [INFO] train episode 965: winner = 1, steps = 6\n",
      "01:37:18 [INFO] train episode 966: winner = 1, steps = 6\n",
      "01:37:19 [INFO] train episode 967: winner = 0, steps = 8\n",
      "01:37:20 [INFO] train episode 968: winner = 1, steps = 6\n",
      "01:37:20 [INFO] train episode 969: winner = 1, steps = 4\n",
      "01:37:20 [INFO] train episode 970: winner = 0, steps = 8\n",
      "01:37:21 [INFO] train episode 971: winner = -1, steps = 5\n",
      "01:37:22 [INFO] train episode 972: winner = -1, steps = 5\n",
      "01:37:23 [INFO] train episode 973: winner = 1, steps = 6\n",
      "01:37:24 [INFO] train episode 974: winner = -1, steps = 5\n",
      "01:37:25 [INFO] train episode 975: winner = 1, steps = 8\n",
      "01:37:29 [INFO] train episode 976: winner = 1, steps = 6\n",
      "01:37:29 [INFO] test episode 976:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:37:54 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "01:38:22 [INFO] step 1：player -1, action (1, 2)\n",
      "+++\n",
      "++x\n",
      "+o+\n",
      "01:38:41 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "++x\n",
      "+o+\n",
      "01:38:46 [INFO] step 3：player -1, action (1, 1)\n",
      "+o+\n",
      "+xx\n",
      "+o+\n",
      "01:38:46 [INFO] step 4：player 1, action (2, 0)\n",
      "+o+\n",
      "+xx\n",
      "oo+\n",
      "01:38:46 [INFO] step 5：player -1, action (1, 0)\n",
      "+o+\n",
      "xxx\n",
      "oo+\n",
      "01:38:46 [INFO] test episode 976: winner = -1, steps = 5\n",
      "01:39:34 [INFO] train episode 977: winner = 1, steps = 6\n",
      "01:40:22 [INFO] train episode 978: winner = 0, steps = 8\n",
      "01:40:51 [INFO] train episode 979: winner = 1, steps = 6\n",
      "01:41:05 [INFO] train episode 980: winner = 0, steps = 8\n",
      "01:41:28 [INFO] train episode 981: winner = 1, steps = 6\n",
      "01:41:33 [INFO] train episode 982: winner = 0, steps = 8\n",
      "01:41:42 [INFO] train episode 983: winner = 1, steps = 6\n",
      "01:42:04 [INFO] train episode 984: winner = 1, steps = 8\n",
      "01:42:14 [INFO] train episode 985: winner = 1, steps = 4\n",
      "01:42:20 [INFO] train episode 986: winner = 1, steps = 6\n",
      "01:42:47 [INFO] train episode 987: winner = 1, steps = 8\n",
      "01:43:02 [INFO] train episode 988: winner = 0, steps = 8\n",
      "01:43:32 [INFO] train episode 989: winner = 0, steps = 8\n",
      "01:43:38 [INFO] train episode 990: winner = 1, steps = 4\n",
      "01:43:40 [INFO] train episode 991: winner = 1, steps = 6\n",
      "01:43:45 [INFO] train episode 992: winner = 1, steps = 4\n",
      "01:44:03 [INFO] train episode 993: winner = 1, steps = 6\n",
      "01:44:18 [INFO] train episode 994: winner = 1, steps = 4\n",
      "01:44:23 [INFO] train episode 995: winner = 1, steps = 4\n",
      "01:44:24 [INFO] train episode 996: winner = 1, steps = 6\n",
      "01:44:30 [INFO] train episode 997: winner = 1, steps = 4\n",
      "01:44:34 [INFO] train episode 998: winner = 1, steps = 6\n",
      "01:44:34 [INFO] train episode 999: winner = 1, steps = 6\n",
      "01:44:38 [INFO] train episode 1000: winner = 1, steps = 4\n",
      "01:44:58 [INFO] train episode 1001: winner = 0, steps = 8\n",
      "01:45:09 [INFO] train episode 1002: winner = 1, steps = 4\n",
      "01:45:10 [INFO] train episode 1003: winner = -1, steps = 5\n",
      "01:45:15 [INFO] train episode 1004: winner = 1, steps = 4\n",
      "01:45:17 [INFO] train episode 1005: winner = 1, steps = 4\n",
      "01:45:19 [INFO] train episode 1006: winner = 1, steps = 4\n",
      "01:45:23 [INFO] train episode 1007: winner = 1, steps = 6\n",
      "01:45:27 [INFO] train episode 1008: winner = 1, steps = 4\n",
      "01:45:31 [INFO] train episode 1009: winner = 1, steps = 4\n",
      "01:45:38 [INFO] train episode 1010: winner = 1, steps = 4\n",
      "01:45:40 [INFO] train episode 1011: winner = 1, steps = 4\n",
      "01:45:44 [INFO] train episode 1012: winner = 1, steps = 4\n",
      "01:45:46 [INFO] train episode 1013: winner = -1, steps = 7\n",
      "01:45:46 [INFO] train episode 1014: winner = 1, steps = 6\n",
      "01:45:47 [INFO] train episode 1015: winner = 0, steps = 8\n",
      "01:45:47 [INFO] train episode 1016: winner = 1, steps = 4\n",
      "01:45:56 [INFO] train episode 1017: winner = 0, steps = 8\n",
      "01:45:57 [INFO] train episode 1018: winner = 0, steps = 8\n",
      "01:45:59 [INFO] train episode 1019: winner = 1, steps = 6\n",
      "01:46:02 [INFO] train episode 1020: winner = 1, steps = 4\n",
      "01:46:02 [INFO] train episode 1021: winner = 1, steps = 4\n",
      "01:46:02 [INFO] train episode 1022: winner = 1, steps = 4\n",
      "01:46:10 [INFO] train episode 1023: winner = 1, steps = 6\n",
      "01:46:13 [INFO] train episode 1024: winner = 1, steps = 6\n",
      "01:46:13 [INFO] train episode 1025: winner = 1, steps = 6\n",
      "01:46:17 [INFO] train episode 1026: winner = 1, steps = 4\n",
      "01:46:35 [INFO] train episode 1027: winner = 1, steps = 6\n",
      "01:46:37 [INFO] train episode 1028: winner = 1, steps = 4\n",
      "01:46:42 [INFO] train episode 1029: winner = 0, steps = 8\n",
      "01:46:44 [INFO] train episode 1030: winner = 1, steps = 4\n",
      "01:46:58 [INFO] train episode 1031: winner = 0, steps = 8\n",
      "01:47:00 [INFO] train episode 1032: winner = 1, steps = 4\n",
      "01:47:01 [INFO] train episode 1033: winner = 0, steps = 8\n",
      "01:47:01 [INFO] train episode 1034: winner = -1, steps = 5\n",
      "01:47:05 [INFO] train episode 1035: winner = 1, steps = 4\n",
      "01:47:08 [INFO] train episode 1036: winner = 0, steps = 8\n",
      "01:47:09 [INFO] train episode 1037: winner = 1, steps = 6\n",
      "01:47:10 [INFO] train episode 1038: winner = 1, steps = 4\n",
      "01:47:13 [INFO] train episode 1039: winner = 0, steps = 8\n",
      "01:47:21 [INFO] train episode 1040: winner = 0, steps = 8\n",
      "01:47:22 [INFO] train episode 1041: winner = 1, steps = 4\n",
      "01:47:34 [INFO] train episode 1042: winner = 0, steps = 8\n",
      "01:47:35 [INFO] train episode 1043: winner = 1, steps = 6\n",
      "01:47:35 [INFO] train episode 1044: winner = 1, steps = 4\n",
      "01:47:36 [INFO] train episode 1045: winner = 1, steps = 6\n",
      "01:47:45 [INFO] train episode 1046: winner = 1, steps = 4\n",
      "01:47:47 [INFO] train episode 1047: winner = 1, steps = 6\n",
      "01:47:47 [INFO] train episode 1048: winner = 1, steps = 6\n",
      "01:47:48 [INFO] train episode 1049: winner = -1, steps = 5\n",
      "01:47:49 [INFO] train episode 1050: winner = 1, steps = 6\n",
      "01:47:50 [INFO] train episode 1051: winner = 0, steps = 8\n",
      "01:47:51 [INFO] train episode 1052: winner = 1, steps = 4\n",
      "01:47:51 [INFO] train episode 1053: winner = 1, steps = 4\n",
      "01:47:55 [INFO] train episode 1054: winner = 0, steps = 8\n",
      "01:47:57 [INFO] train episode 1055: winner = 1, steps = 6\n",
      "01:48:01 [INFO] train episode 1056: winner = -1, steps = 7\n",
      "01:48:08 [INFO] train episode 1057: winner = 1, steps = 6\n",
      "01:48:11 [INFO] train episode 1058: winner = 1, steps = 6\n",
      "01:48:12 [INFO] train episode 1059: winner = 0, steps = 8\n",
      "01:48:16 [INFO] train episode 1060: winner = -1, steps = 5\n",
      "01:48:18 [INFO] train episode 1061: winner = -1, steps = 7\n",
      "01:48:19 [INFO] train episode 1062: winner = -1, steps = 7\n",
      "01:48:21 [INFO] train episode 1063: winner = 1, steps = 6\n",
      "01:48:24 [INFO] train episode 1064: winner = 1, steps = 4\n",
      "01:48:25 [INFO] train episode 1065: winner = 0, steps = 8\n",
      "01:48:27 [INFO] train episode 1066: winner = -1, steps = 5\n",
      "01:48:28 [INFO] train episode 1067: winner = 1, steps = 4\n",
      "01:48:31 [INFO] train episode 1068: winner = 1, steps = 6\n",
      "01:48:35 [INFO] train episode 1069: winner = 1, steps = 6\n",
      "01:48:36 [INFO] train episode 1070: winner = 1, steps = 6\n",
      "01:48:37 [INFO] train episode 1071: winner = 1, steps = 6\n",
      "01:48:41 [INFO] train episode 1072: winner = 0, steps = 8\n",
      "01:48:47 [INFO] train episode 1073: winner = 0, steps = 8\n",
      "01:48:50 [INFO] train episode 1074: winner = 1, steps = 6\n",
      "01:48:52 [INFO] train episode 1075: winner = 1, steps = 4\n",
      "01:48:53 [INFO] train episode 1076: winner = 0, steps = 8\n",
      "01:48:54 [INFO] train episode 1077: winner = 1, steps = 4\n",
      "01:48:56 [INFO] train episode 1078: winner = -1, steps = 7\n",
      "01:49:01 [INFO] train episode 1079: winner = 0, steps = 8\n",
      "01:49:03 [INFO] train episode 1080: winner = 0, steps = 8\n",
      "01:49:05 [INFO] train episode 1081: winner = 1, steps = 4\n",
      "01:49:05 [INFO] train episode 1082: winner = 1, steps = 4\n",
      "01:49:06 [INFO] train episode 1083: winner = 1, steps = 6\n",
      "01:49:06 [INFO] train episode 1084: winner = 1, steps = 4\n",
      "01:49:06 [INFO] train episode 1085: winner = 1, steps = 4\n",
      "01:49:08 [INFO] train episode 1086: winner = 1, steps = 6\n",
      "01:49:10 [INFO] train episode 1087: winner = -1, steps = 5\n",
      "01:49:10 [INFO] train episode 1088: winner = 1, steps = 4\n",
      "01:49:11 [INFO] train episode 1089: winner = 0, steps = 8\n",
      "01:49:14 [INFO] train episode 1090: winner = 0, steps = 8\n",
      "01:49:16 [INFO] train episode 1091: winner = 1, steps = 6\n",
      "01:49:16 [INFO] train episode 1092: winner = 1, steps = 4\n",
      "01:49:16 [INFO] train episode 1093: winner = -1, steps = 7\n",
      "01:49:18 [INFO] train episode 1094: winner = -1, steps = 5\n",
      "01:49:21 [INFO] train episode 1095: winner = 1, steps = 4\n",
      "01:49:23 [INFO] train episode 1096: winner = 1, steps = 6\n",
      "01:49:23 [INFO] train episode 1097: winner = 1, steps = 4\n",
      "01:49:24 [INFO] train episode 1098: winner = 0, steps = 8\n",
      "01:49:24 [INFO] train episode 1099: winner = 1, steps = 4\n",
      "01:49:26 [INFO] train episode 1100: winner = 1, steps = 6\n",
      "01:49:28 [INFO] train episode 1101: winner = -1, steps = 5\n",
      "01:49:29 [INFO] train episode 1102: winner = 1, steps = 6\n",
      "01:49:30 [INFO] train episode 1103: winner = 1, steps = 8\n",
      "01:49:31 [INFO] train episode 1104: winner = -1, steps = 5\n",
      "01:49:31 [INFO] train episode 1105: winner = 1, steps = 4\n",
      "01:49:36 [INFO] train episode 1106: winner = -1, steps = 7\n",
      "01:49:44 [INFO] train episode 1107: winner = 0, steps = 8\n",
      "01:49:46 [INFO] train episode 1108: winner = 1, steps = 6\n",
      "01:49:50 [INFO] train episode 1109: winner = 1, steps = 6\n",
      "01:49:51 [INFO] train episode 1110: winner = 1, steps = 8\n",
      "01:49:51 [INFO] train episode 1111: winner = 1, steps = 6\n",
      "01:49:51 [INFO] train episode 1112: winner = 1, steps = 6\n",
      "01:49:53 [INFO] train episode 1113: winner = 1, steps = 6\n",
      "01:49:53 [INFO] train episode 1114: winner = 1, steps = 8\n",
      "01:49:54 [INFO] train episode 1115: winner = 1, steps = 4\n",
      "01:49:54 [INFO] train episode 1116: winner = 1, steps = 6\n",
      "01:49:55 [INFO] train episode 1117: winner = 0, steps = 8\n",
      "01:50:00 [INFO] train episode 1118: winner = 0, steps = 8\n",
      "01:50:01 [INFO] train episode 1119: winner = 1, steps = 4\n",
      "01:50:01 [INFO] train episode 1120: winner = 1, steps = 4\n",
      "01:50:03 [INFO] train episode 1121: winner = 1, steps = 6\n",
      "01:50:03 [INFO] train episode 1122: winner = 1, steps = 6\n",
      "01:50:07 [INFO] train episode 1123: winner = 0, steps = 8\n",
      "01:50:07 [INFO] test episode 1123:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "01:50:33 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "01:50:58 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+++\n",
      "++o\n",
      "01:51:17 [INFO] step 2：player 1, action (0, 2)\n",
      "+xo\n",
      "+++\n",
      "++o\n",
      "01:51:18 [INFO] step 3：player -1, action (1, 2)\n",
      "+xo\n",
      "++x\n",
      "++o\n",
      "01:51:19 [INFO] step 4：player 1, action (1, 1)\n",
      "+xo\n",
      "+ox\n",
      "++o\n",
      "01:51:20 [INFO] step 5：player -1, action (2, 0)\n",
      "+xo\n",
      "+ox\n",
      "x+o\n",
      "01:51:20 [INFO] step 6：player 1, action (0, 0)\n",
      "oxo\n",
      "+ox\n",
      "x+o\n",
      "01:51:20 [INFO] test episode 1123: winner = 1, steps = 6\n",
      "01:51:50 [INFO] train episode 1124: winner = 0, steps = 8\n",
      "01:52:27 [INFO] train episode 1125: winner = 1, steps = 4\n",
      "01:52:40 [INFO] train episode 1126: winner = 1, steps = 8\n",
      "01:53:15 [INFO] train episode 1127: winner = 1, steps = 6\n",
      "01:53:26 [INFO] train episode 1128: winner = 1, steps = 4\n",
      "01:53:29 [INFO] train episode 1129: winner = -1, steps = 5\n",
      "01:53:41 [INFO] train episode 1130: winner = 0, steps = 8\n",
      "01:54:04 [INFO] train episode 1131: winner = 0, steps = 8\n",
      "01:54:23 [INFO] train episode 1132: winner = 0, steps = 8\n",
      "01:54:29 [INFO] train episode 1133: winner = 1, steps = 6\n",
      "01:54:52 [INFO] train episode 1134: winner = 1, steps = 6\n",
      "01:55:19 [INFO] train episode 1135: winner = 1, steps = 6\n",
      "01:55:28 [INFO] train episode 1136: winner = 0, steps = 8\n",
      "01:55:45 [INFO] train episode 1137: winner = 1, steps = 4\n",
      "01:55:46 [INFO] train episode 1138: winner = 1, steps = 4\n",
      "01:55:55 [INFO] train episode 1139: winner = 1, steps = 6\n",
      "01:56:28 [INFO] train episode 1140: winner = 1, steps = 6\n",
      "01:56:36 [INFO] train episode 1141: winner = 1, steps = 6\n",
      "01:56:44 [INFO] train episode 1142: winner = 1, steps = 4\n",
      "01:57:00 [INFO] train episode 1143: winner = 0, steps = 8\n",
      "01:57:37 [INFO] train episode 1144: winner = 1, steps = 8\n",
      "01:57:51 [INFO] train episode 1145: winner = 0, steps = 8\n",
      "01:58:04 [INFO] train episode 1146: winner = -1, steps = 5\n",
      "01:58:07 [INFO] train episode 1147: winner = 1, steps = 4\n",
      "01:58:23 [INFO] train episode 1148: winner = 1, steps = 6\n",
      "01:58:24 [INFO] train episode 1149: winner = -1, steps = 5\n",
      "01:58:29 [INFO] train episode 1150: winner = 1, steps = 6\n",
      "01:58:30 [INFO] train episode 1151: winner = 1, steps = 6\n",
      "01:58:34 [INFO] train episode 1152: winner = 0, steps = 8\n",
      "01:58:41 [INFO] train episode 1153: winner = -1, steps = 5\n",
      "01:58:47 [INFO] train episode 1154: winner = 1, steps = 4\n",
      "01:58:47 [INFO] train episode 1155: winner = 1, steps = 4\n",
      "01:58:49 [INFO] train episode 1156: winner = 1, steps = 4\n",
      "01:58:54 [INFO] train episode 1157: winner = 1, steps = 4\n",
      "01:58:57 [INFO] train episode 1158: winner = 0, steps = 8\n",
      "01:59:02 [INFO] train episode 1159: winner = 1, steps = 6\n",
      "01:59:04 [INFO] train episode 1160: winner = 0, steps = 8\n",
      "01:59:07 [INFO] train episode 1161: winner = -1, steps = 7\n",
      "01:59:10 [INFO] train episode 1162: winner = 0, steps = 8\n",
      "01:59:14 [INFO] train episode 1163: winner = 1, steps = 6\n",
      "01:59:15 [INFO] train episode 1164: winner = 1, steps = 8\n",
      "01:59:20 [INFO] train episode 1165: winner = -1, steps = 5\n",
      "01:59:22 [INFO] train episode 1166: winner = 1, steps = 6\n",
      "01:59:23 [INFO] train episode 1167: winner = 0, steps = 8\n",
      "01:59:24 [INFO] train episode 1168: winner = -1, steps = 5\n",
      "01:59:25 [INFO] train episode 1169: winner = 1, steps = 6\n",
      "01:59:34 [INFO] train episode 1170: winner = 0, steps = 8\n",
      "01:59:34 [INFO] train episode 1171: winner = -1, steps = 5\n",
      "01:59:44 [INFO] train episode 1172: winner = 0, steps = 8\n",
      "01:59:45 [INFO] train episode 1173: winner = 1, steps = 4\n",
      "01:59:51 [INFO] train episode 1174: winner = 0, steps = 8\n",
      "01:59:52 [INFO] train episode 1175: winner = 1, steps = 6\n",
      "02:00:02 [INFO] train episode 1176: winner = 0, steps = 8\n",
      "02:00:07 [INFO] train episode 1177: winner = 0, steps = 8\n",
      "02:00:07 [INFO] train episode 1178: winner = 1, steps = 4\n",
      "02:00:10 [INFO] train episode 1179: winner = 0, steps = 8\n",
      "02:00:10 [INFO] train episode 1180: winner = 1, steps = 6\n",
      "02:00:12 [INFO] train episode 1181: winner = 1, steps = 4\n",
      "02:00:17 [INFO] train episode 1182: winner = -1, steps = 7\n",
      "02:00:19 [INFO] train episode 1183: winner = 0, steps = 8\n",
      "02:00:19 [INFO] train episode 1184: winner = 1, steps = 4\n",
      "02:00:19 [INFO] train episode 1185: winner = 1, steps = 6\n",
      "02:00:23 [INFO] train episode 1186: winner = 1, steps = 4\n",
      "02:00:26 [INFO] train episode 1187: winner = 1, steps = 4\n",
      "02:00:27 [INFO] train episode 1188: winner = 0, steps = 8\n",
      "02:00:29 [INFO] train episode 1189: winner = 1, steps = 4\n",
      "02:00:32 [INFO] train episode 1190: winner = 0, steps = 8\n",
      "02:00:36 [INFO] train episode 1191: winner = -1, steps = 7\n",
      "02:00:37 [INFO] train episode 1192: winner = 1, steps = 6\n",
      "02:00:37 [INFO] train episode 1193: winner = 1, steps = 6\n",
      "02:00:43 [INFO] train episode 1194: winner = 0, steps = 8\n",
      "02:00:46 [INFO] train episode 1195: winner = -1, steps = 5\n",
      "02:00:49 [INFO] train episode 1196: winner = 0, steps = 8\n",
      "02:00:50 [INFO] train episode 1197: winner = 1, steps = 6\n",
      "02:00:53 [INFO] train episode 1198: winner = 1, steps = 4\n",
      "02:00:55 [INFO] train episode 1199: winner = 0, steps = 8\n",
      "02:00:58 [INFO] train episode 1200: winner = 1, steps = 6\n",
      "02:00:58 [INFO] train episode 1201: winner = 1, steps = 6\n",
      "02:01:00 [INFO] train episode 1202: winner = 1, steps = 4\n",
      "02:01:03 [INFO] train episode 1203: winner = 0, steps = 8\n",
      "02:01:06 [INFO] train episode 1204: winner = 1, steps = 8\n",
      "02:01:08 [INFO] train episode 1205: winner = 1, steps = 6\n",
      "02:01:09 [INFO] train episode 1206: winner = 1, steps = 4\n",
      "02:01:09 [INFO] train episode 1207: winner = 0, steps = 8\n",
      "02:01:14 [INFO] train episode 1208: winner = 1, steps = 6\n",
      "02:01:14 [INFO] train episode 1209: winner = 1, steps = 6\n",
      "02:01:16 [INFO] train episode 1210: winner = 1, steps = 6\n",
      "02:01:17 [INFO] train episode 1211: winner = -1, steps = 5\n",
      "02:01:18 [INFO] train episode 1212: winner = 1, steps = 6\n",
      "02:01:20 [INFO] train episode 1213: winner = 1, steps = 8\n",
      "02:01:21 [INFO] train episode 1214: winner = 1, steps = 6\n",
      "02:01:23 [INFO] train episode 1215: winner = 1, steps = 4\n",
      "02:01:24 [INFO] train episode 1216: winner = 1, steps = 4\n",
      "02:01:30 [INFO] train episode 1217: winner = 1, steps = 6\n",
      "02:01:33 [INFO] train episode 1218: winner = 1, steps = 6\n",
      "02:01:33 [INFO] train episode 1219: winner = 1, steps = 6\n",
      "02:01:34 [INFO] train episode 1220: winner = -1, steps = 7\n",
      "02:01:35 [INFO] train episode 1221: winner = 1, steps = 8\n",
      "02:01:37 [INFO] train episode 1222: winner = 0, steps = 8\n",
      "02:01:37 [INFO] train episode 1223: winner = 1, steps = 6\n",
      "02:01:37 [INFO] train episode 1224: winner = 0, steps = 8\n",
      "02:01:39 [INFO] train episode 1225: winner = 1, steps = 4\n",
      "02:01:43 [INFO] train episode 1226: winner = 0, steps = 8\n",
      "02:01:43 [INFO] train episode 1227: winner = 1, steps = 4\n",
      "02:01:43 [INFO] train episode 1228: winner = 0, steps = 8\n",
      "02:01:46 [INFO] train episode 1229: winner = 1, steps = 6\n",
      "02:01:46 [INFO] train episode 1230: winner = 1, steps = 6\n",
      "02:01:49 [INFO] train episode 1231: winner = 1, steps = 6\n",
      "02:01:49 [INFO] train episode 1232: winner = 1, steps = 4\n",
      "02:01:49 [INFO] train episode 1233: winner = 1, steps = 6\n",
      "02:01:50 [INFO] train episode 1234: winner = 0, steps = 8\n",
      "02:01:50 [INFO] train episode 1235: winner = 1, steps = 4\n",
      "02:01:51 [INFO] train episode 1236: winner = 0, steps = 8\n",
      "02:01:52 [INFO] train episode 1237: winner = 1, steps = 4\n",
      "02:01:52 [INFO] train episode 1238: winner = 0, steps = 8\n",
      "02:01:55 [INFO] train episode 1239: winner = -1, steps = 5\n",
      "02:01:57 [INFO] train episode 1240: winner = 1, steps = 6\n",
      "02:01:57 [INFO] train episode 1241: winner = 0, steps = 8\n",
      "02:02:00 [INFO] train episode 1242: winner = 1, steps = 6\n",
      "02:02:00 [INFO] train episode 1243: winner = 1, steps = 6\n",
      "02:02:01 [INFO] train episode 1244: winner = 1, steps = 6\n",
      "02:02:02 [INFO] train episode 1245: winner = 1, steps = 4\n",
      "02:02:05 [INFO] train episode 1246: winner = -1, steps = 5\n",
      "02:02:06 [INFO] train episode 1247: winner = 1, steps = 6\n",
      "02:02:06 [INFO] train episode 1248: winner = 1, steps = 4\n",
      "02:02:06 [INFO] train episode 1249: winner = 1, steps = 4\n",
      "02:02:08 [INFO] train episode 1250: winner = 0, steps = 8\n",
      "02:02:12 [INFO] train episode 1251: winner = 0, steps = 8\n",
      "02:02:15 [INFO] train episode 1252: winner = 1, steps = 6\n",
      "02:02:16 [INFO] train episode 1253: winner = -1, steps = 5\n",
      "02:02:17 [INFO] train episode 1254: winner = 1, steps = 4\n",
      "02:02:19 [INFO] train episode 1255: winner = 0, steps = 8\n",
      "02:02:19 [INFO] train episode 1256: winner = 1, steps = 4\n",
      "02:02:19 [INFO] train episode 1257: winner = 0, steps = 8\n",
      "02:02:20 [INFO] train episode 1258: winner = 1, steps = 4\n",
      "02:02:20 [INFO] train episode 1259: winner = 1, steps = 6\n",
      "02:02:21 [INFO] train episode 1260: winner = 0, steps = 8\n",
      "02:02:22 [INFO] train episode 1261: winner = 0, steps = 8\n",
      "02:02:24 [INFO] train episode 1262: winner = 1, steps = 8\n",
      "02:02:29 [INFO] train episode 1263: winner = 1, steps = 6\n",
      "02:02:29 [INFO] test episode 1263:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:02:55 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "02:03:16 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "o++\n",
      "+++\n",
      "02:03:30 [INFO] step 2：player 1, action (2, 0)\n",
      "++x\n",
      "o++\n",
      "o++\n",
      "02:03:32 [INFO] step 3：player -1, action (0, 0)\n",
      "x+x\n",
      "o++\n",
      "o++\n",
      "02:03:32 [INFO] step 4：player 1, action (2, 2)\n",
      "x+x\n",
      "o++\n",
      "o+o\n",
      "02:03:33 [INFO] step 5：player -1, action (0, 1)\n",
      "xxx\n",
      "o++\n",
      "o+o\n",
      "02:03:33 [INFO] test episode 1263: winner = -1, steps = 5\n",
      "02:04:23 [INFO] train episode 1264: winner = -1, steps = 7\n",
      "02:05:12 [INFO] train episode 1265: winner = 1, steps = 6\n",
      "02:05:47 [INFO] train episode 1266: winner = 1, steps = 6\n",
      "02:05:56 [INFO] train episode 1267: winner = 1, steps = 4\n",
      "02:06:25 [INFO] train episode 1268: winner = 0, steps = 8\n",
      "02:06:35 [INFO] train episode 1269: winner = 0, steps = 8\n",
      "02:06:42 [INFO] train episode 1270: winner = 0, steps = 8\n",
      "02:07:15 [INFO] train episode 1271: winner = 0, steps = 8\n",
      "02:07:41 [INFO] train episode 1272: winner = 1, steps = 6\n",
      "02:07:48 [INFO] train episode 1273: winner = 1, steps = 6\n",
      "02:08:08 [INFO] train episode 1274: winner = 1, steps = 4\n",
      "02:08:26 [INFO] train episode 1275: winner = 1, steps = 6\n",
      "02:08:38 [INFO] train episode 1276: winner = 0, steps = 8\n",
      "02:08:48 [INFO] train episode 1277: winner = 0, steps = 8\n",
      "02:08:52 [INFO] train episode 1278: winner = 0, steps = 8\n",
      "02:09:10 [INFO] train episode 1279: winner = 1, steps = 4\n",
      "02:09:26 [INFO] train episode 1280: winner = -1, steps = 5\n",
      "02:09:33 [INFO] train episode 1281: winner = 0, steps = 8\n",
      "02:09:34 [INFO] train episode 1282: winner = 1, steps = 6\n",
      "02:09:35 [INFO] train episode 1283: winner = 1, steps = 6\n",
      "02:09:40 [INFO] train episode 1284: winner = 1, steps = 4\n",
      "02:09:52 [INFO] train episode 1285: winner = 1, steps = 6\n",
      "02:10:11 [INFO] train episode 1286: winner = 1, steps = 4\n",
      "02:10:14 [INFO] train episode 1287: winner = 1, steps = 6\n",
      "02:10:25 [INFO] train episode 1288: winner = 1, steps = 4\n",
      "02:10:37 [INFO] train episode 1289: winner = 1, steps = 6\n",
      "02:10:39 [INFO] train episode 1290: winner = 1, steps = 6\n",
      "02:10:39 [INFO] train episode 1291: winner = -1, steps = 5\n",
      "02:10:48 [INFO] train episode 1292: winner = -1, steps = 5\n",
      "02:10:52 [INFO] train episode 1293: winner = 1, steps = 6\n",
      "02:10:53 [INFO] train episode 1294: winner = 1, steps = 4\n",
      "02:11:01 [INFO] train episode 1295: winner = 1, steps = 6\n",
      "02:11:02 [INFO] train episode 1296: winner = 1, steps = 6\n",
      "02:11:05 [INFO] train episode 1297: winner = 1, steps = 4\n",
      "02:11:14 [INFO] train episode 1298: winner = 0, steps = 8\n",
      "02:11:17 [INFO] train episode 1299: winner = 1, steps = 4\n",
      "02:11:25 [INFO] train episode 1300: winner = 1, steps = 4\n",
      "02:11:30 [INFO] train episode 1301: winner = 1, steps = 6\n",
      "02:11:35 [INFO] train episode 1302: winner = 0, steps = 8\n",
      "02:11:35 [INFO] train episode 1303: winner = 1, steps = 6\n",
      "02:11:37 [INFO] train episode 1304: winner = 1, steps = 4\n",
      "02:11:41 [INFO] train episode 1305: winner = 0, steps = 8\n",
      "02:11:42 [INFO] train episode 1306: winner = 1, steps = 6\n",
      "02:11:45 [INFO] train episode 1307: winner = 0, steps = 8\n",
      "02:11:48 [INFO] train episode 1308: winner = 1, steps = 6\n",
      "02:11:58 [INFO] train episode 1309: winner = 0, steps = 8\n",
      "02:12:02 [INFO] train episode 1310: winner = 0, steps = 8\n",
      "02:12:03 [INFO] train episode 1311: winner = 1, steps = 4\n",
      "02:12:11 [INFO] train episode 1312: winner = 1, steps = 8\n",
      "02:12:18 [INFO] train episode 1313: winner = 1, steps = 6\n",
      "02:12:18 [INFO] train episode 1314: winner = 1, steps = 6\n",
      "02:12:18 [INFO] train episode 1315: winner = 1, steps = 4\n",
      "02:12:22 [INFO] train episode 1316: winner = 1, steps = 4\n",
      "02:12:22 [INFO] train episode 1317: winner = 1, steps = 6\n",
      "02:12:31 [INFO] train episode 1318: winner = 0, steps = 8\n",
      "02:12:35 [INFO] train episode 1319: winner = 1, steps = 4\n",
      "02:12:35 [INFO] train episode 1320: winner = 1, steps = 6\n",
      "02:12:39 [INFO] train episode 1321: winner = 1, steps = 6\n",
      "02:12:41 [INFO] train episode 1322: winner = 1, steps = 6\n",
      "02:12:46 [INFO] train episode 1323: winner = 1, steps = 8\n",
      "02:12:47 [INFO] train episode 1324: winner = 1, steps = 6\n",
      "02:12:51 [INFO] train episode 1325: winner = 1, steps = 6\n",
      "02:12:55 [INFO] train episode 1326: winner = -1, steps = 5\n",
      "02:12:58 [INFO] train episode 1327: winner = 0, steps = 8\n",
      "02:13:00 [INFO] train episode 1328: winner = 1, steps = 4\n",
      "02:13:01 [INFO] train episode 1329: winner = -1, steps = 5\n",
      "02:13:06 [INFO] train episode 1330: winner = -1, steps = 5\n",
      "02:13:06 [INFO] train episode 1331: winner = 0, steps = 8\n",
      "02:13:07 [INFO] train episode 1332: winner = 1, steps = 4\n",
      "02:13:09 [INFO] train episode 1333: winner = -1, steps = 7\n",
      "02:13:09 [INFO] train episode 1334: winner = 0, steps = 8\n",
      "02:13:12 [INFO] train episode 1335: winner = 1, steps = 6\n",
      "02:13:16 [INFO] train episode 1336: winner = 0, steps = 8\n",
      "02:13:18 [INFO] train episode 1337: winner = 1, steps = 4\n",
      "02:13:19 [INFO] train episode 1338: winner = 1, steps = 6\n",
      "02:13:20 [INFO] train episode 1339: winner = 0, steps = 8\n",
      "02:13:21 [INFO] train episode 1340: winner = 1, steps = 6\n",
      "02:13:22 [INFO] train episode 1341: winner = 1, steps = 6\n",
      "02:13:22 [INFO] train episode 1342: winner = 1, steps = 6\n",
      "02:13:24 [INFO] train episode 1343: winner = -1, steps = 7\n",
      "02:13:25 [INFO] train episode 1344: winner = 0, steps = 8\n",
      "02:13:25 [INFO] train episode 1345: winner = 1, steps = 4\n",
      "02:13:25 [INFO] train episode 1346: winner = 1, steps = 4\n",
      "02:13:27 [INFO] train episode 1347: winner = 0, steps = 8\n",
      "02:13:31 [INFO] train episode 1348: winner = 1, steps = 6\n",
      "02:13:33 [INFO] train episode 1349: winner = 1, steps = 4\n",
      "02:13:33 [INFO] train episode 1350: winner = 0, steps = 8\n",
      "02:13:36 [INFO] train episode 1351: winner = 0, steps = 8\n",
      "02:13:37 [INFO] train episode 1352: winner = 1, steps = 4\n",
      "02:13:41 [INFO] train episode 1353: winner = 0, steps = 8\n",
      "02:13:44 [INFO] train episode 1354: winner = 0, steps = 8\n",
      "02:13:48 [INFO] train episode 1355: winner = 1, steps = 4\n",
      "02:13:51 [INFO] train episode 1356: winner = 1, steps = 4\n",
      "02:13:53 [INFO] train episode 1357: winner = 0, steps = 8\n",
      "02:13:55 [INFO] train episode 1358: winner = 1, steps = 6\n",
      "02:13:59 [INFO] train episode 1359: winner = 0, steps = 8\n",
      "02:13:59 [INFO] train episode 1360: winner = 1, steps = 6\n",
      "02:14:01 [INFO] train episode 1361: winner = 1, steps = 6\n",
      "02:14:04 [INFO] train episode 1362: winner = 0, steps = 8\n",
      "02:14:06 [INFO] train episode 1363: winner = 0, steps = 8\n",
      "02:14:08 [INFO] train episode 1364: winner = 1, steps = 4\n",
      "02:14:08 [INFO] train episode 1365: winner = 1, steps = 6\n",
      "02:14:10 [INFO] train episode 1366: winner = 1, steps = 8\n",
      "02:14:11 [INFO] train episode 1367: winner = 1, steps = 4\n",
      "02:14:13 [INFO] train episode 1368: winner = 1, steps = 6\n",
      "02:14:13 [INFO] train episode 1369: winner = 1, steps = 4\n",
      "02:14:14 [INFO] train episode 1370: winner = 1, steps = 4\n",
      "02:14:15 [INFO] train episode 1371: winner = 0, steps = 8\n",
      "02:14:19 [INFO] train episode 1372: winner = -1, steps = 7\n",
      "02:14:23 [INFO] train episode 1373: winner = 1, steps = 6\n",
      "02:14:28 [INFO] train episode 1374: winner = 0, steps = 8\n",
      "02:14:33 [INFO] train episode 1375: winner = 1, steps = 6\n",
      "02:14:34 [INFO] train episode 1376: winner = 0, steps = 8\n",
      "02:14:36 [INFO] train episode 1377: winner = 1, steps = 6\n",
      "02:14:37 [INFO] train episode 1378: winner = 1, steps = 4\n",
      "02:14:37 [INFO] train episode 1379: winner = 1, steps = 6\n",
      "02:14:38 [INFO] train episode 1380: winner = 1, steps = 4\n",
      "02:14:38 [INFO] train episode 1381: winner = -1, steps = 5\n",
      "02:14:39 [INFO] train episode 1382: winner = 1, steps = 6\n",
      "02:14:40 [INFO] train episode 1383: winner = 0, steps = 8\n",
      "02:14:41 [INFO] train episode 1384: winner = 1, steps = 4\n",
      "02:14:42 [INFO] train episode 1385: winner = 1, steps = 6\n",
      "02:14:45 [INFO] train episode 1386: winner = 1, steps = 6\n",
      "02:14:50 [INFO] train episode 1387: winner = 1, steps = 8\n",
      "02:14:52 [INFO] train episode 1388: winner = 1, steps = 8\n",
      "02:14:53 [INFO] train episode 1389: winner = -1, steps = 7\n",
      "02:14:56 [INFO] train episode 1390: winner = 1, steps = 6\n",
      "02:14:57 [INFO] train episode 1391: winner = 0, steps = 8\n",
      "02:15:00 [INFO] train episode 1392: winner = 0, steps = 8\n",
      "02:15:02 [INFO] train episode 1393: winner = 1, steps = 6\n",
      "02:15:06 [INFO] train episode 1394: winner = 1, steps = 6\n",
      "02:15:08 [INFO] train episode 1395: winner = 0, steps = 8\n",
      "02:15:11 [INFO] train episode 1396: winner = 1, steps = 6\n",
      "02:15:11 [INFO] train episode 1397: winner = 1, steps = 6\n",
      "02:15:13 [INFO] train episode 1398: winner = 1, steps = 4\n",
      "02:15:14 [INFO] train episode 1399: winner = 1, steps = 6\n",
      "02:15:15 [INFO] train episode 1400: winner = -1, steps = 7\n",
      "02:15:15 [INFO] train episode 1401: winner = -1, steps = 7\n",
      "02:15:17 [INFO] train episode 1402: winner = 1, steps = 4\n",
      "02:15:17 [INFO] train episode 1403: winner = 1, steps = 4\n",
      "02:15:18 [INFO] train episode 1404: winner = 1, steps = 4\n",
      "02:15:18 [INFO] test episode 1404:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:15:43 [INFO] step 0：player 1, action (0, 0)\n",
      "o++\n",
      "+++\n",
      "+++\n",
      "02:16:06 [INFO] step 1：player -1, action (0, 1)\n",
      "ox+\n",
      "+++\n",
      "+++\n",
      "02:16:24 [INFO] step 2：player 1, action (1, 0)\n",
      "ox+\n",
      "o++\n",
      "+++\n",
      "02:16:27 [INFO] step 3：player -1, action (2, 0)\n",
      "ox+\n",
      "o++\n",
      "x++\n",
      "02:16:28 [INFO] step 4：player 1, action (1, 1)\n",
      "ox+\n",
      "oo+\n",
      "x++\n",
      "02:16:28 [INFO] step 5：player -1, action (2, 1)\n",
      "ox+\n",
      "oo+\n",
      "xx+\n",
      "02:16:28 [INFO] step 6：player 1, action (1, 2)\n",
      "ox+\n",
      "ooo\n",
      "xx+\n",
      "02:16:28 [INFO] test episode 1404: winner = 1, steps = 6\n",
      "02:17:04 [INFO] train episode 1405: winner = 1, steps = 6\n",
      "02:17:33 [INFO] train episode 1406: winner = 1, steps = 6\n",
      "02:18:27 [INFO] train episode 1407: winner = -1, steps = 5\n",
      "02:18:31 [INFO] train episode 1408: winner = 1, steps = 6\n",
      "02:19:05 [INFO] train episode 1409: winner = -1, steps = 5\n",
      "02:19:34 [INFO] train episode 1410: winner = 1, steps = 6\n",
      "02:19:44 [INFO] train episode 1411: winner = 1, steps = 6\n",
      "02:20:08 [INFO] train episode 1412: winner = -1, steps = 5\n",
      "02:20:13 [INFO] train episode 1413: winner = 1, steps = 4\n",
      "02:20:33 [INFO] train episode 1414: winner = 1, steps = 6\n",
      "02:20:45 [INFO] train episode 1415: winner = 0, steps = 8\n",
      "02:21:02 [INFO] train episode 1416: winner = 1, steps = 4\n",
      "02:21:07 [INFO] train episode 1417: winner = 1, steps = 6\n",
      "02:21:07 [INFO] train episode 1418: winner = 1, steps = 4\n",
      "02:21:18 [INFO] train episode 1419: winner = 0, steps = 8\n",
      "02:21:30 [INFO] train episode 1420: winner = 1, steps = 8\n",
      "02:22:06 [INFO] train episode 1421: winner = 1, steps = 8\n",
      "02:22:21 [INFO] train episode 1422: winner = 1, steps = 6\n",
      "02:22:26 [INFO] train episode 1423: winner = 0, steps = 8\n",
      "02:22:26 [INFO] train episode 1424: winner = 1, steps = 6\n",
      "02:22:31 [INFO] train episode 1425: winner = 0, steps = 8\n",
      "02:22:34 [INFO] train episode 1426: winner = 1, steps = 4\n",
      "02:22:41 [INFO] train episode 1427: winner = 0, steps = 8\n",
      "02:22:51 [INFO] train episode 1428: winner = -1, steps = 5\n",
      "02:22:56 [INFO] train episode 1429: winner = -1, steps = 7\n",
      "02:22:59 [INFO] train episode 1430: winner = 1, steps = 4\n",
      "02:23:05 [INFO] train episode 1431: winner = 1, steps = 6\n",
      "02:23:28 [INFO] train episode 1432: winner = 0, steps = 8\n",
      "02:23:30 [INFO] train episode 1433: winner = 1, steps = 4\n",
      "02:23:43 [INFO] train episode 1434: winner = 1, steps = 8\n",
      "02:23:43 [INFO] train episode 1435: winner = 1, steps = 4\n",
      "02:23:44 [INFO] train episode 1436: winner = 1, steps = 6\n",
      "02:23:56 [INFO] train episode 1437: winner = -1, steps = 7\n",
      "02:24:06 [INFO] train episode 1438: winner = -1, steps = 5\n",
      "02:24:09 [INFO] train episode 1439: winner = 1, steps = 6\n",
      "02:24:18 [INFO] train episode 1440: winner = 0, steps = 8\n",
      "02:24:21 [INFO] train episode 1441: winner = -1, steps = 5\n",
      "02:24:30 [INFO] train episode 1442: winner = -1, steps = 5\n",
      "02:24:31 [INFO] train episode 1443: winner = 1, steps = 6\n",
      "02:24:32 [INFO] train episode 1444: winner = 1, steps = 4\n",
      "02:24:33 [INFO] train episode 1445: winner = 1, steps = 4\n",
      "02:24:35 [INFO] train episode 1446: winner = 1, steps = 6\n",
      "02:24:39 [INFO] train episode 1447: winner = 0, steps = 8\n",
      "02:24:41 [INFO] train episode 1448: winner = 1, steps = 4\n",
      "02:24:46 [INFO] train episode 1449: winner = 0, steps = 8\n",
      "02:24:47 [INFO] train episode 1450: winner = 1, steps = 4\n",
      "02:24:48 [INFO] train episode 1451: winner = 1, steps = 4\n",
      "02:24:51 [INFO] train episode 1452: winner = 1, steps = 6\n",
      "02:24:56 [INFO] train episode 1453: winner = 0, steps = 8\n",
      "02:24:59 [INFO] train episode 1454: winner = 1, steps = 4\n",
      "02:25:10 [INFO] train episode 1455: winner = 0, steps = 8\n",
      "02:25:13 [INFO] train episode 1456: winner = 1, steps = 6\n",
      "02:25:17 [INFO] train episode 1457: winner = 1, steps = 4\n",
      "02:25:22 [INFO] train episode 1458: winner = -1, steps = 7\n",
      "02:25:27 [INFO] train episode 1459: winner = 0, steps = 8\n",
      "02:25:38 [INFO] train episode 1460: winner = 1, steps = 6\n",
      "02:25:42 [INFO] train episode 1461: winner = 1, steps = 6\n",
      "02:25:42 [INFO] train episode 1462: winner = 1, steps = 6\n",
      "02:25:43 [INFO] train episode 1463: winner = 1, steps = 4\n",
      "02:25:45 [INFO] train episode 1464: winner = -1, steps = 5\n",
      "02:25:50 [INFO] train episode 1465: winner = -1, steps = 5\n",
      "02:25:50 [INFO] train episode 1466: winner = 1, steps = 6\n",
      "02:25:51 [INFO] train episode 1467: winner = 1, steps = 6\n",
      "02:25:53 [INFO] train episode 1468: winner = 0, steps = 8\n",
      "02:25:57 [INFO] train episode 1469: winner = 0, steps = 8\n",
      "02:25:59 [INFO] train episode 1470: winner = 0, steps = 8\n",
      "02:26:02 [INFO] train episode 1471: winner = 0, steps = 8\n",
      "02:26:05 [INFO] train episode 1472: winner = 0, steps = 8\n",
      "02:26:06 [INFO] train episode 1473: winner = 0, steps = 8\n",
      "02:26:06 [INFO] train episode 1474: winner = 1, steps = 4\n",
      "02:26:14 [INFO] train episode 1475: winner = 0, steps = 8\n",
      "02:26:14 [INFO] train episode 1476: winner = 1, steps = 6\n",
      "02:26:14 [INFO] train episode 1477: winner = 1, steps = 4\n",
      "02:26:21 [INFO] train episode 1478: winner = 1, steps = 6\n",
      "02:26:21 [INFO] train episode 1479: winner = 1, steps = 6\n",
      "02:26:23 [INFO] train episode 1480: winner = 0, steps = 8\n",
      "02:26:27 [INFO] train episode 1481: winner = 0, steps = 8\n",
      "02:26:33 [INFO] train episode 1482: winner = 1, steps = 6\n",
      "02:26:35 [INFO] train episode 1483: winner = 1, steps = 6\n",
      "02:26:35 [INFO] train episode 1484: winner = 0, steps = 8\n",
      "02:26:36 [INFO] train episode 1485: winner = 0, steps = 8\n",
      "02:26:37 [INFO] train episode 1486: winner = -1, steps = 5\n",
      "02:26:38 [INFO] train episode 1487: winner = 1, steps = 4\n",
      "02:26:42 [INFO] train episode 1488: winner = 1, steps = 6\n",
      "02:26:45 [INFO] train episode 1489: winner = 1, steps = 6\n",
      "02:26:48 [INFO] train episode 1490: winner = 1, steps = 4\n",
      "02:26:50 [INFO] train episode 1491: winner = -1, steps = 5\n",
      "02:26:54 [INFO] train episode 1492: winner = 0, steps = 8\n",
      "02:26:55 [INFO] train episode 1493: winner = 1, steps = 4\n",
      "02:26:55 [INFO] train episode 1494: winner = 1, steps = 4\n",
      "02:26:55 [INFO] train episode 1495: winner = 1, steps = 4\n",
      "02:26:56 [INFO] train episode 1496: winner = 1, steps = 4\n",
      "02:26:57 [INFO] train episode 1497: winner = -1, steps = 7\n",
      "02:26:58 [INFO] train episode 1498: winner = 1, steps = 6\n",
      "02:26:59 [INFO] train episode 1499: winner = 1, steps = 6\n",
      "02:26:59 [INFO] train episode 1500: winner = 1, steps = 6\n",
      "02:27:01 [INFO] train episode 1501: winner = 0, steps = 8\n",
      "02:27:03 [INFO] train episode 1502: winner = 0, steps = 8\n",
      "02:27:03 [INFO] train episode 1503: winner = 1, steps = 4\n",
      "02:27:03 [INFO] train episode 1504: winner = 1, steps = 6\n",
      "02:27:03 [INFO] train episode 1505: winner = 1, steps = 4\n",
      "02:27:04 [INFO] train episode 1506: winner = 1, steps = 6\n",
      "02:27:05 [INFO] train episode 1507: winner = 1, steps = 6\n",
      "02:27:06 [INFO] train episode 1508: winner = 0, steps = 8\n",
      "02:27:06 [INFO] train episode 1509: winner = 1, steps = 4\n",
      "02:27:08 [INFO] train episode 1510: winner = 0, steps = 8\n",
      "02:27:13 [INFO] train episode 1511: winner = 0, steps = 8\n",
      "02:27:13 [INFO] train episode 1512: winner = 1, steps = 6\n",
      "02:27:13 [INFO] train episode 1513: winner = 1, steps = 4\n",
      "02:27:15 [INFO] train episode 1514: winner = 1, steps = 4\n",
      "02:27:15 [INFO] train episode 1515: winner = 1, steps = 4\n",
      "02:27:17 [INFO] train episode 1516: winner = 1, steps = 6\n",
      "02:27:17 [INFO] train episode 1517: winner = 0, steps = 8\n",
      "02:27:18 [INFO] train episode 1518: winner = -1, steps = 7\n",
      "02:27:19 [INFO] train episode 1519: winner = -1, steps = 5\n",
      "02:27:21 [INFO] train episode 1520: winner = 1, steps = 8\n",
      "02:27:21 [INFO] train episode 1521: winner = -1, steps = 5\n",
      "02:27:24 [INFO] train episode 1522: winner = 1, steps = 4\n",
      "02:27:26 [INFO] train episode 1523: winner = 0, steps = 8\n",
      "02:27:30 [INFO] train episode 1524: winner = -1, steps = 5\n",
      "02:27:33 [INFO] train episode 1525: winner = 0, steps = 8\n",
      "02:27:35 [INFO] train episode 1526: winner = 0, steps = 8\n",
      "02:27:36 [INFO] train episode 1527: winner = 1, steps = 6\n",
      "02:27:37 [INFO] train episode 1528: winner = 0, steps = 8\n",
      "02:27:38 [INFO] train episode 1529: winner = 1, steps = 6\n",
      "02:27:40 [INFO] train episode 1530: winner = -1, steps = 5\n",
      "02:27:42 [INFO] train episode 1531: winner = 1, steps = 8\n",
      "02:27:42 [INFO] train episode 1532: winner = 1, steps = 6\n",
      "02:27:43 [INFO] train episode 1533: winner = -1, steps = 7\n",
      "02:27:45 [INFO] train episode 1534: winner = 1, steps = 6\n",
      "02:27:46 [INFO] train episode 1535: winner = 1, steps = 4\n",
      "02:27:47 [INFO] train episode 1536: winner = 0, steps = 8\n",
      "02:27:50 [INFO] train episode 1537: winner = 1, steps = 8\n",
      "02:27:50 [INFO] train episode 1538: winner = 1, steps = 4\n",
      "02:27:51 [INFO] train episode 1539: winner = 1, steps = 4\n",
      "02:27:55 [INFO] train episode 1540: winner = 1, steps = 6\n",
      "02:27:55 [INFO] train episode 1541: winner = 0, steps = 8\n",
      "02:27:55 [INFO] train episode 1542: winner = 1, steps = 6\n",
      "02:27:56 [INFO] train episode 1543: winner = 0, steps = 8\n",
      "02:27:59 [INFO] train episode 1544: winner = 0, steps = 8\n",
      "02:28:00 [INFO] train episode 1545: winner = 1, steps = 6\n",
      "02:28:00 [INFO] test episode 1545:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:28:25 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "02:28:45 [INFO] step 1：player -1, action (1, 1)\n",
      "+o+\n",
      "+x+\n",
      "+++\n",
      "02:29:02 [INFO] step 2：player 1, action (2, 0)\n",
      "+o+\n",
      "+x+\n",
      "o++\n",
      "02:29:13 [INFO] step 3：player -1, action (0, 0)\n",
      "xo+\n",
      "+x+\n",
      "o++\n",
      "02:29:14 [INFO] step 4：player 1, action (2, 2)\n",
      "xo+\n",
      "+x+\n",
      "o+o\n",
      "02:29:14 [INFO] step 5：player -1, action (2, 1)\n",
      "xo+\n",
      "+x+\n",
      "oxo\n",
      "02:29:14 [INFO] step 6：player 1, action (1, 0)\n",
      "xo+\n",
      "ox+\n",
      "oxo\n",
      "02:29:15 [INFO] step 7：player -1, action (1, 2)\n",
      "xo+\n",
      "oxx\n",
      "oxo\n",
      "02:29:15 [INFO] step 8：player 1, action (0, 2)\n",
      "xoo\n",
      "oxx\n",
      "oxo\n",
      "02:29:15 [INFO] test episode 1545: winner = 0, steps = 8\n",
      "02:29:57 [INFO] train episode 1546: winner = 1, steps = 6\n",
      "02:30:48 [INFO] train episode 1547: winner = 1, steps = 8\n",
      "02:31:01 [INFO] train episode 1548: winner = 1, steps = 6\n",
      "02:31:22 [INFO] train episode 1549: winner = 1, steps = 6\n",
      "02:31:45 [INFO] train episode 1550: winner = 1, steps = 4\n",
      "02:32:07 [INFO] train episode 1551: winner = 0, steps = 8\n",
      "02:32:09 [INFO] train episode 1552: winner = 0, steps = 8\n",
      "02:32:18 [INFO] train episode 1553: winner = 1, steps = 6\n",
      "02:32:34 [INFO] train episode 1554: winner = 1, steps = 8\n",
      "02:32:42 [INFO] train episode 1555: winner = 1, steps = 4\n",
      "02:32:54 [INFO] train episode 1556: winner = 0, steps = 8\n",
      "02:33:06 [INFO] train episode 1557: winner = 0, steps = 8\n",
      "02:33:14 [INFO] train episode 1558: winner = 0, steps = 8\n",
      "02:33:21 [INFO] train episode 1559: winner = 1, steps = 4\n",
      "02:33:25 [INFO] train episode 1560: winner = 0, steps = 8\n",
      "02:33:37 [INFO] train episode 1561: winner = 0, steps = 8\n",
      "02:33:42 [INFO] train episode 1562: winner = -1, steps = 5\n",
      "02:33:51 [INFO] train episode 1563: winner = 0, steps = 8\n",
      "02:34:21 [INFO] train episode 1564: winner = 1, steps = 6\n",
      "02:34:48 [INFO] train episode 1565: winner = 0, steps = 8\n",
      "02:34:56 [INFO] train episode 1566: winner = 1, steps = 4\n",
      "02:34:58 [INFO] train episode 1567: winner = 0, steps = 8\n",
      "02:35:18 [INFO] train episode 1568: winner = 0, steps = 8\n",
      "02:35:34 [INFO] train episode 1569: winner = 1, steps = 4\n",
      "02:35:38 [INFO] train episode 1570: winner = 0, steps = 8\n",
      "02:35:39 [INFO] train episode 1571: winner = 1, steps = 4\n",
      "02:35:41 [INFO] train episode 1572: winner = 0, steps = 8\n",
      "02:35:44 [INFO] train episode 1573: winner = 1, steps = 6\n",
      "02:35:57 [INFO] train episode 1574: winner = 1, steps = 4\n",
      "02:36:05 [INFO] train episode 1575: winner = 1, steps = 6\n",
      "02:36:07 [INFO] train episode 1576: winner = -1, steps = 7\n",
      "02:36:14 [INFO] train episode 1577: winner = 1, steps = 4\n",
      "02:36:15 [INFO] train episode 1578: winner = 1, steps = 6\n",
      "02:36:21 [INFO] train episode 1579: winner = 1, steps = 4\n",
      "02:36:22 [INFO] train episode 1580: winner = 1, steps = 6\n",
      "02:36:24 [INFO] train episode 1581: winner = 1, steps = 6\n",
      "02:36:27 [INFO] train episode 1582: winner = -1, steps = 7\n",
      "02:36:31 [INFO] train episode 1583: winner = 0, steps = 8\n",
      "02:36:33 [INFO] train episode 1584: winner = 1, steps = 6\n",
      "02:36:35 [INFO] train episode 1585: winner = -1, steps = 7\n",
      "02:36:35 [INFO] train episode 1586: winner = 1, steps = 6\n",
      "02:36:38 [INFO] train episode 1587: winner = 1, steps = 6\n",
      "02:36:38 [INFO] train episode 1588: winner = -1, steps = 7\n",
      "02:36:43 [INFO] train episode 1589: winner = 1, steps = 6\n",
      "02:36:48 [INFO] train episode 1590: winner = 0, steps = 8\n",
      "02:36:52 [INFO] train episode 1591: winner = 0, steps = 8\n",
      "02:37:01 [INFO] train episode 1592: winner = 1, steps = 6\n",
      "02:37:08 [INFO] train episode 1593: winner = 1, steps = 6\n",
      "02:37:09 [INFO] train episode 1594: winner = 0, steps = 8\n",
      "02:37:16 [INFO] train episode 1595: winner = 0, steps = 8\n",
      "02:37:22 [INFO] train episode 1596: winner = 1, steps = 4\n",
      "02:37:23 [INFO] train episode 1597: winner = 0, steps = 8\n",
      "02:37:24 [INFO] train episode 1598: winner = 1, steps = 6\n",
      "02:37:28 [INFO] train episode 1599: winner = 0, steps = 8\n",
      "02:37:29 [INFO] train episode 1600: winner = 1, steps = 4\n",
      "02:37:30 [INFO] train episode 1601: winner = 1, steps = 6\n",
      "02:37:33 [INFO] train episode 1602: winner = 1, steps = 6\n",
      "02:37:36 [INFO] train episode 1603: winner = 1, steps = 6\n",
      "02:37:37 [INFO] train episode 1604: winner = 1, steps = 6\n",
      "02:37:40 [INFO] train episode 1605: winner = 1, steps = 4\n",
      "02:37:50 [INFO] train episode 1606: winner = 1, steps = 8\n",
      "02:37:53 [INFO] train episode 1607: winner = 1, steps = 4\n",
      "02:37:57 [INFO] train episode 1608: winner = 1, steps = 6\n",
      "02:37:57 [INFO] train episode 1609: winner = 1, steps = 6\n",
      "02:37:59 [INFO] train episode 1610: winner = -1, steps = 5\n",
      "02:38:06 [INFO] train episode 1611: winner = 1, steps = 6\n",
      "02:38:08 [INFO] train episode 1612: winner = 1, steps = 6\n",
      "02:38:09 [INFO] train episode 1613: winner = 0, steps = 8\n",
      "02:38:11 [INFO] train episode 1614: winner = -1, steps = 7\n",
      "02:38:15 [INFO] train episode 1615: winner = -1, steps = 7\n",
      "02:38:15 [INFO] train episode 1616: winner = 1, steps = 6\n",
      "02:38:17 [INFO] train episode 1617: winner = 1, steps = 4\n",
      "02:38:18 [INFO] train episode 1618: winner = 1, steps = 6\n",
      "02:38:19 [INFO] train episode 1619: winner = -1, steps = 5\n",
      "02:38:19 [INFO] train episode 1620: winner = 1, steps = 6\n",
      "02:38:20 [INFO] train episode 1621: winner = 1, steps = 4\n",
      "02:38:20 [INFO] train episode 1622: winner = 1, steps = 6\n",
      "02:38:24 [INFO] train episode 1623: winner = 1, steps = 6\n",
      "02:38:24 [INFO] train episode 1624: winner = 1, steps = 6\n",
      "02:38:25 [INFO] train episode 1625: winner = -1, steps = 7\n",
      "02:38:30 [INFO] train episode 1626: winner = 0, steps = 8\n",
      "02:38:32 [INFO] train episode 1627: winner = 1, steps = 6\n",
      "02:38:34 [INFO] train episode 1628: winner = -1, steps = 7\n",
      "02:38:35 [INFO] train episode 1629: winner = 1, steps = 6\n",
      "02:38:39 [INFO] train episode 1630: winner = 1, steps = 6\n",
      "02:38:44 [INFO] train episode 1631: winner = 0, steps = 8\n",
      "02:38:47 [INFO] train episode 1632: winner = 1, steps = 6\n",
      "02:38:48 [INFO] train episode 1633: winner = 1, steps = 6\n",
      "02:38:49 [INFO] train episode 1634: winner = 1, steps = 6\n",
      "02:38:51 [INFO] train episode 1635: winner = 0, steps = 8\n",
      "02:38:53 [INFO] train episode 1636: winner = 0, steps = 8\n",
      "02:38:53 [INFO] train episode 1637: winner = 1, steps = 4\n",
      "02:38:57 [INFO] train episode 1638: winner = -1, steps = 5\n",
      "02:38:59 [INFO] train episode 1639: winner = -1, steps = 7\n",
      "02:39:03 [INFO] train episode 1640: winner = 1, steps = 8\n",
      "02:39:04 [INFO] train episode 1641: winner = 0, steps = 8\n",
      "02:39:10 [INFO] train episode 1642: winner = 0, steps = 8\n",
      "02:39:12 [INFO] train episode 1643: winner = 0, steps = 8\n",
      "02:39:15 [INFO] train episode 1644: winner = 1, steps = 6\n",
      "02:39:19 [INFO] train episode 1645: winner = -1, steps = 5\n",
      "02:39:21 [INFO] train episode 1646: winner = 1, steps = 4\n",
      "02:39:21 [INFO] train episode 1647: winner = 1, steps = 6\n",
      "02:39:24 [INFO] train episode 1648: winner = 1, steps = 6\n",
      "02:39:26 [INFO] train episode 1649: winner = -1, steps = 7\n",
      "02:39:27 [INFO] train episode 1650: winner = 1, steps = 4\n",
      "02:39:28 [INFO] train episode 1651: winner = 1, steps = 6\n",
      "02:39:28 [INFO] train episode 1652: winner = 1, steps = 6\n",
      "02:39:29 [INFO] train episode 1653: winner = 1, steps = 4\n",
      "02:39:30 [INFO] train episode 1654: winner = 1, steps = 4\n",
      "02:39:30 [INFO] train episode 1655: winner = 1, steps = 4\n",
      "02:39:30 [INFO] train episode 1656: winner = 1, steps = 6\n",
      "02:39:30 [INFO] train episode 1657: winner = 0, steps = 8\n",
      "02:39:31 [INFO] train episode 1658: winner = 1, steps = 6\n",
      "02:39:32 [INFO] train episode 1659: winner = 1, steps = 4\n",
      "02:39:33 [INFO] train episode 1660: winner = 1, steps = 6\n",
      "02:39:37 [INFO] train episode 1661: winner = -1, steps = 7\n",
      "02:39:39 [INFO] train episode 1662: winner = 1, steps = 6\n",
      "02:39:40 [INFO] train episode 1663: winner = 0, steps = 8\n",
      "02:39:41 [INFO] train episode 1664: winner = -1, steps = 7\n",
      "02:39:41 [INFO] train episode 1665: winner = 1, steps = 4\n",
      "02:39:41 [INFO] train episode 1666: winner = 1, steps = 6\n",
      "02:39:48 [INFO] train episode 1667: winner = 1, steps = 8\n",
      "02:39:50 [INFO] train episode 1668: winner = 1, steps = 6\n",
      "02:39:50 [INFO] train episode 1669: winner = 1, steps = 4\n",
      "02:39:52 [INFO] train episode 1670: winner = 1, steps = 8\n",
      "02:39:54 [INFO] train episode 1671: winner = 1, steps = 6\n",
      "02:39:54 [INFO] train episode 1672: winner = 1, steps = 4\n",
      "02:39:56 [INFO] train episode 1673: winner = -1, steps = 5\n",
      "02:39:57 [INFO] train episode 1674: winner = -1, steps = 7\n",
      "02:39:58 [INFO] train episode 1675: winner = 0, steps = 8\n",
      "02:40:01 [INFO] train episode 1676: winner = 0, steps = 8\n",
      "02:40:05 [INFO] train episode 1677: winner = 1, steps = 6\n",
      "02:40:08 [INFO] train episode 1678: winner = 1, steps = 6\n",
      "02:40:09 [INFO] train episode 1679: winner = 1, steps = 4\n",
      "02:40:09 [INFO] train episode 1680: winner = -1, steps = 7\n",
      "02:40:11 [INFO] train episode 1681: winner = 0, steps = 8\n",
      "02:40:13 [INFO] train episode 1682: winner = -1, steps = 7\n",
      "02:40:15 [INFO] train episode 1683: winner = 1, steps = 6\n",
      "02:40:15 [INFO] test episode 1683:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:40:39 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "02:41:02 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "++o\n",
      "x++\n",
      "02:41:19 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "++o\n",
      "x++\n",
      "02:41:22 [INFO] step 3：player -1, action (2, 2)\n",
      "++o\n",
      "++o\n",
      "x+x\n",
      "02:41:23 [INFO] step 4：player 1, action (0, 0)\n",
      "o+o\n",
      "++o\n",
      "x+x\n",
      "02:41:23 [INFO] step 5：player -1, action (2, 1)\n",
      "o+o\n",
      "++o\n",
      "xxx\n",
      "02:41:23 [INFO] test episode 1683: winner = -1, steps = 5\n",
      "02:42:04 [INFO] train episode 1684: winner = 1, steps = 8\n",
      "02:42:45 [INFO] train episode 1685: winner = -1, steps = 5\n",
      "02:43:25 [INFO] train episode 1686: winner = 1, steps = 6\n",
      "02:43:36 [INFO] train episode 1687: winner = 1, steps = 4\n",
      "02:44:08 [INFO] train episode 1688: winner = 0, steps = 8\n",
      "02:44:10 [INFO] train episode 1689: winner = 0, steps = 8\n",
      "02:44:28 [INFO] train episode 1690: winner = -1, steps = 7\n",
      "02:44:49 [INFO] train episode 1691: winner = 0, steps = 8\n",
      "02:44:59 [INFO] train episode 1692: winner = -1, steps = 5\n",
      "02:45:15 [INFO] train episode 1693: winner = 0, steps = 8\n",
      "02:45:49 [INFO] train episode 1694: winner = 1, steps = 6\n",
      "02:45:56 [INFO] train episode 1695: winner = 1, steps = 6\n",
      "02:46:01 [INFO] train episode 1696: winner = 1, steps = 4\n",
      "02:46:09 [INFO] train episode 1697: winner = 1, steps = 8\n",
      "02:46:28 [INFO] train episode 1698: winner = 1, steps = 4\n",
      "02:46:37 [INFO] train episode 1699: winner = 1, steps = 6\n",
      "02:46:48 [INFO] train episode 1700: winner = 1, steps = 6\n",
      "02:46:58 [INFO] train episode 1701: winner = 1, steps = 6\n",
      "02:47:02 [INFO] train episode 1702: winner = 0, steps = 8\n",
      "02:47:02 [INFO] train episode 1703: winner = 1, steps = 6\n",
      "02:47:04 [INFO] train episode 1704: winner = -1, steps = 5\n",
      "02:47:17 [INFO] train episode 1705: winner = 0, steps = 8\n",
      "02:47:18 [INFO] train episode 1706: winner = 0, steps = 8\n",
      "02:47:25 [INFO] train episode 1707: winner = 1, steps = 6\n",
      "02:47:26 [INFO] train episode 1708: winner = 0, steps = 8\n",
      "02:47:31 [INFO] train episode 1709: winner = 0, steps = 8\n",
      "02:47:43 [INFO] train episode 1710: winner = -1, steps = 5\n",
      "02:47:56 [INFO] train episode 1711: winner = 1, steps = 4\n",
      "02:48:03 [INFO] train episode 1712: winner = 0, steps = 8\n",
      "02:48:05 [INFO] train episode 1713: winner = 0, steps = 8\n",
      "02:48:10 [INFO] train episode 1714: winner = -1, steps = 7\n",
      "02:48:15 [INFO] train episode 1715: winner = 1, steps = 4\n",
      "02:48:17 [INFO] train episode 1716: winner = -1, steps = 5\n",
      "02:48:18 [INFO] train episode 1717: winner = 1, steps = 6\n",
      "02:48:26 [INFO] train episode 1718: winner = 0, steps = 8\n",
      "02:48:34 [INFO] train episode 1719: winner = 0, steps = 8\n",
      "02:48:40 [INFO] train episode 1720: winner = 1, steps = 4\n",
      "02:48:47 [INFO] train episode 1721: winner = 1, steps = 6\n",
      "02:48:57 [INFO] train episode 1722: winner = 1, steps = 8\n",
      "02:49:04 [INFO] train episode 1723: winner = 0, steps = 8\n",
      "02:49:05 [INFO] train episode 1724: winner = 1, steps = 4\n",
      "02:49:11 [INFO] train episode 1725: winner = 1, steps = 8\n",
      "02:49:14 [INFO] train episode 1726: winner = 0, steps = 8\n",
      "02:49:24 [INFO] train episode 1727: winner = 0, steps = 8\n",
      "02:49:28 [INFO] train episode 1728: winner = 1, steps = 6\n",
      "02:49:33 [INFO] train episode 1729: winner = 0, steps = 8\n",
      "02:49:36 [INFO] train episode 1730: winner = 0, steps = 8\n",
      "02:49:39 [INFO] train episode 1731: winner = 0, steps = 8\n",
      "02:49:41 [INFO] train episode 1732: winner = 0, steps = 8\n",
      "02:49:45 [INFO] train episode 1733: winner = 0, steps = 8\n",
      "02:49:47 [INFO] train episode 1734: winner = 1, steps = 6\n",
      "02:49:47 [INFO] train episode 1735: winner = -1, steps = 7\n",
      "02:49:47 [INFO] train episode 1736: winner = 1, steps = 4\n",
      "02:49:50 [INFO] train episode 1737: winner = 1, steps = 6\n",
      "02:49:54 [INFO] train episode 1738: winner = 1, steps = 6\n",
      "02:49:59 [INFO] train episode 1739: winner = -1, steps = 7\n",
      "02:50:02 [INFO] train episode 1740: winner = 1, steps = 6\n",
      "02:50:06 [INFO] train episode 1741: winner = 1, steps = 6\n",
      "02:50:09 [INFO] train episode 1742: winner = 1, steps = 6\n",
      "02:50:09 [INFO] train episode 1743: winner = 1, steps = 6\n",
      "02:50:16 [INFO] train episode 1744: winner = 1, steps = 6\n",
      "02:50:20 [INFO] train episode 1745: winner = 1, steps = 6\n",
      "02:50:20 [INFO] train episode 1746: winner = 0, steps = 8\n",
      "02:50:25 [INFO] train episode 1747: winner = 0, steps = 8\n",
      "02:50:27 [INFO] train episode 1748: winner = 1, steps = 4\n",
      "02:50:28 [INFO] train episode 1749: winner = -1, steps = 7\n",
      "02:50:29 [INFO] train episode 1750: winner = -1, steps = 7\n",
      "02:50:33 [INFO] train episode 1751: winner = 1, steps = 6\n",
      "02:50:33 [INFO] train episode 1752: winner = 1, steps = 6\n",
      "02:50:33 [INFO] train episode 1753: winner = 1, steps = 4\n",
      "02:50:33 [INFO] train episode 1754: winner = 1, steps = 6\n",
      "02:50:37 [INFO] train episode 1755: winner = 0, steps = 8\n",
      "02:50:38 [INFO] train episode 1756: winner = 0, steps = 8\n",
      "02:50:40 [INFO] train episode 1757: winner = 0, steps = 8\n",
      "02:50:40 [INFO] train episode 1758: winner = -1, steps = 5\n",
      "02:50:43 [INFO] train episode 1759: winner = 1, steps = 8\n",
      "02:50:44 [INFO] train episode 1760: winner = 0, steps = 8\n",
      "02:50:47 [INFO] train episode 1761: winner = -1, steps = 5\n",
      "02:50:51 [INFO] train episode 1762: winner = 1, steps = 4\n",
      "02:50:53 [INFO] train episode 1763: winner = -1, steps = 5\n",
      "02:50:55 [INFO] train episode 1764: winner = 0, steps = 8\n",
      "02:50:57 [INFO] train episode 1765: winner = -1, steps = 7\n",
      "02:50:58 [INFO] train episode 1766: winner = 1, steps = 4\n",
      "02:50:59 [INFO] train episode 1767: winner = 0, steps = 8\n",
      "02:51:03 [INFO] train episode 1768: winner = 1, steps = 8\n",
      "02:51:05 [INFO] train episode 1769: winner = 1, steps = 6\n",
      "02:51:07 [INFO] train episode 1770: winner = 1, steps = 6\n",
      "02:51:08 [INFO] train episode 1771: winner = -1, steps = 5\n",
      "02:51:09 [INFO] train episode 1772: winner = 1, steps = 6\n",
      "02:51:10 [INFO] train episode 1773: winner = 1, steps = 6\n",
      "02:51:10 [INFO] train episode 1774: winner = -1, steps = 7\n",
      "02:51:11 [INFO] train episode 1775: winner = 0, steps = 8\n",
      "02:51:15 [INFO] train episode 1776: winner = 1, steps = 4\n",
      "02:51:16 [INFO] train episode 1777: winner = 0, steps = 8\n",
      "02:51:17 [INFO] train episode 1778: winner = 1, steps = 6\n",
      "02:51:18 [INFO] train episode 1779: winner = 0, steps = 8\n",
      "02:51:21 [INFO] train episode 1780: winner = 0, steps = 8\n",
      "02:51:24 [INFO] train episode 1781: winner = 1, steps = 4\n",
      "02:51:26 [INFO] train episode 1782: winner = 1, steps = 4\n",
      "02:51:26 [INFO] train episode 1783: winner = 1, steps = 6\n",
      "02:51:27 [INFO] train episode 1784: winner = 0, steps = 8\n",
      "02:51:27 [INFO] train episode 1785: winner = 1, steps = 6\n",
      "02:51:28 [INFO] train episode 1786: winner = 1, steps = 4\n",
      "02:51:28 [INFO] train episode 1787: winner = 1, steps = 4\n",
      "02:51:30 [INFO] train episode 1788: winner = 0, steps = 8\n",
      "02:51:32 [INFO] train episode 1789: winner = 1, steps = 4\n",
      "02:51:33 [INFO] train episode 1790: winner = -1, steps = 7\n",
      "02:51:35 [INFO] train episode 1791: winner = -1, steps = 7\n",
      "02:51:36 [INFO] train episode 1792: winner = 1, steps = 6\n",
      "02:51:40 [INFO] train episode 1793: winner = 1, steps = 6\n",
      "02:51:40 [INFO] train episode 1794: winner = 1, steps = 6\n",
      "02:51:42 [INFO] train episode 1795: winner = -1, steps = 5\n",
      "02:51:43 [INFO] train episode 1796: winner = 1, steps = 6\n",
      "02:51:43 [INFO] train episode 1797: winner = 1, steps = 6\n",
      "02:51:45 [INFO] train episode 1798: winner = -1, steps = 5\n",
      "02:51:46 [INFO] train episode 1799: winner = 1, steps = 6\n",
      "02:51:46 [INFO] train episode 1800: winner = 1, steps = 6\n",
      "02:51:47 [INFO] train episode 1801: winner = 1, steps = 4\n",
      "02:51:48 [INFO] train episode 1802: winner = 1, steps = 6\n",
      "02:51:48 [INFO] train episode 1803: winner = 0, steps = 8\n",
      "02:51:48 [INFO] train episode 1804: winner = 0, steps = 8\n",
      "02:51:49 [INFO] train episode 1805: winner = 1, steps = 6\n",
      "02:51:52 [INFO] train episode 1806: winner = 0, steps = 8\n",
      "02:51:53 [INFO] train episode 1807: winner = 0, steps = 8\n",
      "02:51:55 [INFO] train episode 1808: winner = 0, steps = 8\n",
      "02:51:57 [INFO] train episode 1809: winner = 1, steps = 6\n",
      "02:51:58 [INFO] train episode 1810: winner = 1, steps = 4\n",
      "02:51:58 [INFO] train episode 1811: winner = 0, steps = 8\n",
      "02:52:00 [INFO] train episode 1812: winner = 1, steps = 8\n",
      "02:52:02 [INFO] train episode 1813: winner = 1, steps = 6\n",
      "02:52:02 [INFO] train episode 1814: winner = 1, steps = 6\n",
      "02:52:02 [INFO] train episode 1815: winner = 1, steps = 4\n",
      "02:52:03 [INFO] train episode 1816: winner = 1, steps = 4\n",
      "02:52:04 [INFO] train episode 1817: winner = -1, steps = 5\n",
      "02:52:05 [INFO] train episode 1818: winner = 1, steps = 6\n",
      "02:52:07 [INFO] train episode 1819: winner = -1, steps = 5\n",
      "02:52:07 [INFO] test episode 1819:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "02:52:32 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "02:52:52 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "++o\n",
      "++x\n",
      "02:53:08 [INFO] step 2：player 1, action (1, 0)\n",
      "+++\n",
      "o+o\n",
      "++x\n",
      "02:53:10 [INFO] step 3：player -1, action (1, 1)\n",
      "+++\n",
      "oxo\n",
      "++x\n",
      "02:53:11 [INFO] step 4：player 1, action (0, 0)\n",
      "o++\n",
      "oxo\n",
      "++x\n",
      "02:53:11 [INFO] step 5：player -1, action (2, 0)\n",
      "o++\n",
      "oxo\n",
      "x+x\n",
      "02:53:11 [INFO] step 6：player 1, action (0, 2)\n",
      "o+o\n",
      "oxo\n",
      "x+x\n",
      "02:53:11 [INFO] step 7：player -1, action (2, 1)\n",
      "o+o\n",
      "oxo\n",
      "xxx\n",
      "02:53:11 [INFO] test episode 1819: winner = -1, steps = 7\n",
      "02:53:53 [INFO] train episode 1820: winner = 1, steps = 6\n",
      "02:54:30 [INFO] train episode 1821: winner = 1, steps = 4\n",
      "02:54:55 [INFO] train episode 1822: winner = 1, steps = 6\n",
      "02:55:11 [INFO] train episode 1823: winner = -1, steps = 5\n",
      "02:55:15 [INFO] train episode 1824: winner = 1, steps = 6\n",
      "02:55:31 [INFO] train episode 1825: winner = 0, steps = 8\n",
      "02:56:05 [INFO] train episode 1826: winner = 1, steps = 4\n",
      "02:56:29 [INFO] train episode 1827: winner = 1, steps = 4\n",
      "02:56:40 [INFO] train episode 1828: winner = -1, steps = 5\n",
      "02:56:53 [INFO] train episode 1829: winner = -1, steps = 7\n",
      "02:57:14 [INFO] train episode 1830: winner = 1, steps = 4\n",
      "02:57:22 [INFO] train episode 1831: winner = 1, steps = 4\n",
      "02:57:48 [INFO] train episode 1832: winner = 0, steps = 8\n",
      "02:57:53 [INFO] train episode 1833: winner = 1, steps = 4\n",
      "02:58:10 [INFO] train episode 1834: winner = 1, steps = 4\n",
      "02:58:10 [INFO] train episode 1835: winner = 1, steps = 6\n",
      "02:58:25 [INFO] train episode 1836: winner = 1, steps = 6\n",
      "02:58:38 [INFO] train episode 1837: winner = 0, steps = 8\n",
      "02:58:40 [INFO] train episode 1838: winner = 0, steps = 8\n",
      "02:58:54 [INFO] train episode 1839: winner = -1, steps = 5\n",
      "02:59:05 [INFO] train episode 1840: winner = 0, steps = 8\n",
      "02:59:09 [INFO] train episode 1841: winner = 1, steps = 4\n",
      "02:59:10 [INFO] train episode 1842: winner = 1, steps = 4\n",
      "02:59:18 [INFO] train episode 1843: winner = 1, steps = 8\n",
      "02:59:19 [INFO] train episode 1844: winner = -1, steps = 5\n",
      "02:59:32 [INFO] train episode 1845: winner = 1, steps = 6\n",
      "02:59:35 [INFO] train episode 1846: winner = 1, steps = 4\n",
      "02:59:37 [INFO] train episode 1847: winner = 1, steps = 4\n",
      "02:59:42 [INFO] train episode 1848: winner = 1, steps = 6\n",
      "02:59:45 [INFO] train episode 1849: winner = 1, steps = 6\n",
      "02:59:53 [INFO] train episode 1850: winner = 1, steps = 6\n",
      "02:59:57 [INFO] train episode 1851: winner = 0, steps = 8\n",
      "03:00:00 [INFO] train episode 1852: winner = 1, steps = 6\n",
      "03:00:00 [INFO] train episode 1853: winner = 1, steps = 6\n",
      "03:00:03 [INFO] train episode 1854: winner = 1, steps = 6\n",
      "03:00:06 [INFO] train episode 1855: winner = -1, steps = 5\n",
      "03:00:10 [INFO] train episode 1856: winner = 1, steps = 4\n",
      "03:00:12 [INFO] train episode 1857: winner = 1, steps = 6\n",
      "03:00:14 [INFO] train episode 1858: winner = -1, steps = 5\n",
      "03:00:16 [INFO] train episode 1859: winner = 0, steps = 8\n",
      "03:00:16 [INFO] train episode 1860: winner = 1, steps = 6\n",
      "03:00:17 [INFO] train episode 1861: winner = 1, steps = 4\n",
      "03:00:22 [INFO] train episode 1862: winner = 1, steps = 4\n",
      "03:00:24 [INFO] train episode 1863: winner = 1, steps = 8\n",
      "03:00:27 [INFO] train episode 1864: winner = 1, steps = 4\n",
      "03:00:29 [INFO] train episode 1865: winner = 0, steps = 8\n",
      "03:00:35 [INFO] train episode 1866: winner = -1, steps = 5\n",
      "03:00:38 [INFO] train episode 1867: winner = 1, steps = 4\n",
      "03:00:39 [INFO] train episode 1868: winner = 1, steps = 6\n",
      "03:00:39 [INFO] train episode 1869: winner = 1, steps = 4\n",
      "03:00:47 [INFO] train episode 1870: winner = 1, steps = 8\n",
      "03:00:50 [INFO] train episode 1871: winner = 1, steps = 4\n",
      "03:00:54 [INFO] train episode 1872: winner = 1, steps = 6\n",
      "03:00:56 [INFO] train episode 1873: winner = 0, steps = 8\n",
      "03:00:59 [INFO] train episode 1874: winner = 1, steps = 4\n",
      "03:01:01 [INFO] train episode 1875: winner = 1, steps = 6\n",
      "03:01:04 [INFO] train episode 1876: winner = 0, steps = 8\n",
      "03:01:07 [INFO] train episode 1877: winner = 1, steps = 4\n",
      "03:01:17 [INFO] train episode 1878: winner = 1, steps = 6\n",
      "03:01:17 [INFO] train episode 1879: winner = 1, steps = 4\n",
      "03:01:20 [INFO] train episode 1880: winner = 1, steps = 4\n",
      "03:01:22 [INFO] train episode 1881: winner = -1, steps = 7\n",
      "03:01:26 [INFO] train episode 1882: winner = 1, steps = 6\n",
      "03:01:28 [INFO] train episode 1883: winner = 0, steps = 8\n",
      "03:01:28 [INFO] train episode 1884: winner = -1, steps = 7\n",
      "03:01:34 [INFO] train episode 1885: winner = 0, steps = 8\n",
      "03:01:37 [INFO] train episode 1886: winner = 1, steps = 6\n",
      "03:01:39 [INFO] train episode 1887: winner = 1, steps = 6\n",
      "03:01:42 [INFO] train episode 1888: winner = 1, steps = 4\n",
      "03:01:43 [INFO] train episode 1889: winner = 0, steps = 8\n",
      "03:01:48 [INFO] train episode 1890: winner = 1, steps = 6\n",
      "03:01:49 [INFO] train episode 1891: winner = 1, steps = 6\n",
      "03:01:51 [INFO] train episode 1892: winner = 1, steps = 6\n",
      "03:01:54 [INFO] train episode 1893: winner = 1, steps = 6\n",
      "03:01:56 [INFO] train episode 1894: winner = 0, steps = 8\n",
      "03:02:01 [INFO] train episode 1895: winner = 0, steps = 8\n",
      "03:02:01 [INFO] train episode 1896: winner = 1, steps = 4\n",
      "03:02:02 [INFO] train episode 1897: winner = 1, steps = 6\n",
      "03:02:06 [INFO] train episode 1898: winner = 0, steps = 8\n",
      "03:02:09 [INFO] train episode 1899: winner = 1, steps = 6\n",
      "03:02:09 [INFO] train episode 1900: winner = 1, steps = 4\n",
      "03:02:11 [INFO] train episode 1901: winner = 1, steps = 4\n",
      "03:02:13 [INFO] train episode 1902: winner = 1, steps = 6\n",
      "03:02:13 [INFO] train episode 1903: winner = -1, steps = 7\n",
      "03:02:15 [INFO] train episode 1904: winner = 0, steps = 8\n",
      "03:02:15 [INFO] train episode 1905: winner = 1, steps = 6\n",
      "03:02:16 [INFO] train episode 1906: winner = 1, steps = 4\n",
      "03:02:16 [INFO] train episode 1907: winner = 1, steps = 4\n",
      "03:02:16 [INFO] train episode 1908: winner = 1, steps = 4\n",
      "03:02:16 [INFO] train episode 1909: winner = 1, steps = 4\n",
      "03:02:16 [INFO] train episode 1910: winner = 1, steps = 4\n",
      "03:02:18 [INFO] train episode 1911: winner = 1, steps = 8\n",
      "03:02:18 [INFO] train episode 1912: winner = -1, steps = 5\n",
      "03:02:18 [INFO] train episode 1913: winner = 1, steps = 4\n",
      "03:02:20 [INFO] train episode 1914: winner = 0, steps = 8\n",
      "03:02:23 [INFO] train episode 1915: winner = 1, steps = 6\n",
      "03:02:30 [INFO] train episode 1916: winner = 0, steps = 8\n",
      "03:02:30 [INFO] train episode 1917: winner = 1, steps = 4\n",
      "03:02:32 [INFO] train episode 1918: winner = 0, steps = 8\n",
      "03:02:33 [INFO] train episode 1919: winner = 1, steps = 4\n",
      "03:02:34 [INFO] train episode 1920: winner = 1, steps = 6\n",
      "03:02:35 [INFO] train episode 1921: winner = 0, steps = 8\n",
      "03:02:37 [INFO] train episode 1922: winner = 1, steps = 6\n",
      "03:02:38 [INFO] train episode 1923: winner = 1, steps = 6\n",
      "03:02:38 [INFO] train episode 1924: winner = 0, steps = 8\n",
      "03:02:42 [INFO] train episode 1925: winner = 0, steps = 8\n",
      "03:02:42 [INFO] train episode 1926: winner = 1, steps = 6\n",
      "03:02:44 [INFO] train episode 1927: winner = 0, steps = 8\n",
      "03:02:45 [INFO] train episode 1928: winner = 1, steps = 6\n",
      "03:02:46 [INFO] train episode 1929: winner = 0, steps = 8\n",
      "03:02:46 [INFO] train episode 1930: winner = 0, steps = 8\n",
      "03:02:47 [INFO] train episode 1931: winner = 1, steps = 6\n",
      "03:02:48 [INFO] train episode 1932: winner = 0, steps = 8\n",
      "03:02:50 [INFO] train episode 1933: winner = 0, steps = 8\n",
      "03:02:53 [INFO] train episode 1934: winner = 1, steps = 6\n",
      "03:02:54 [INFO] train episode 1935: winner = 1, steps = 6\n",
      "03:02:57 [INFO] train episode 1936: winner = 1, steps = 6\n",
      "03:02:57 [INFO] train episode 1937: winner = 1, steps = 6\n",
      "03:02:58 [INFO] train episode 1938: winner = -1, steps = 7\n",
      "03:02:59 [INFO] train episode 1939: winner = -1, steps = 5\n",
      "03:02:59 [INFO] train episode 1940: winner = 1, steps = 6\n",
      "03:03:00 [INFO] train episode 1941: winner = 1, steps = 6\n",
      "03:03:02 [INFO] train episode 1942: winner = -1, steps = 7\n",
      "03:03:03 [INFO] train episode 1943: winner = 0, steps = 8\n",
      "03:03:07 [INFO] train episode 1944: winner = 1, steps = 6\n",
      "03:03:07 [INFO] train episode 1945: winner = 1, steps = 4\n",
      "03:03:09 [INFO] train episode 1946: winner = 0, steps = 8\n",
      "03:03:09 [INFO] train episode 1947: winner = 1, steps = 6\n",
      "03:03:11 [INFO] train episode 1948: winner = 0, steps = 8\n",
      "03:03:12 [INFO] train episode 1949: winner = 0, steps = 8\n",
      "03:03:15 [INFO] train episode 1950: winner = 1, steps = 6\n",
      "03:03:16 [INFO] train episode 1951: winner = 1, steps = 6\n",
      "03:03:19 [INFO] train episode 1952: winner = 0, steps = 8\n",
      "03:03:19 [INFO] train episode 1953: winner = 1, steps = 4\n",
      "03:03:19 [INFO] train episode 1954: winner = 1, steps = 4\n",
      "03:03:19 [INFO] train episode 1955: winner = 1, steps = 6\n",
      "03:03:21 [INFO] train episode 1956: winner = 1, steps = 8\n",
      "03:03:22 [INFO] train episode 1957: winner = -1, steps = 7\n",
      "03:03:23 [INFO] train episode 1958: winner = 0, steps = 8\n",
      "03:03:23 [INFO] train episode 1959: winner = 0, steps = 8\n",
      "03:03:23 [INFO] train episode 1960: winner = 1, steps = 4\n",
      "03:03:24 [INFO] train episode 1961: winner = 0, steps = 8\n",
      "03:03:25 [INFO] train episode 1962: winner = 1, steps = 6\n",
      "03:03:25 [INFO] test episode 1962:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:03:48 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "03:04:09 [INFO] step 1：player -1, action (1, 2)\n",
      "+++\n",
      "++x\n",
      "+o+\n",
      "03:04:22 [INFO] step 2：player 1, action (2, 0)\n",
      "+++\n",
      "++x\n",
      "oo+\n",
      "03:04:24 [INFO] step 3：player -1, action (2, 2)\n",
      "+++\n",
      "++x\n",
      "oox\n",
      "03:04:24 [INFO] step 4：player 1, action (0, 2)\n",
      "++o\n",
      "++x\n",
      "oox\n",
      "03:04:24 [INFO] step 5：player -1, action (1, 1)\n",
      "++o\n",
      "+xx\n",
      "oox\n",
      "03:04:24 [INFO] step 6：player 1, action (0, 1)\n",
      "+oo\n",
      "+xx\n",
      "oox\n",
      "03:04:25 [INFO] step 7：player -1, action (0, 0)\n",
      "xoo\n",
      "+xx\n",
      "oox\n",
      "03:04:25 [INFO] test episode 1962: winner = -1, steps = 7\n",
      "03:05:03 [INFO] train episode 1963: winner = 1, steps = 6\n",
      "03:05:37 [INFO] train episode 1964: winner = 1, steps = 4\n",
      "03:06:10 [INFO] train episode 1965: winner = 0, steps = 8\n",
      "03:06:39 [INFO] train episode 1966: winner = 0, steps = 8\n",
      "03:06:55 [INFO] train episode 1967: winner = 0, steps = 8\n",
      "03:06:58 [INFO] train episode 1968: winner = -1, steps = 7\n",
      "03:07:02 [INFO] train episode 1969: winner = 1, steps = 8\n",
      "03:07:19 [INFO] train episode 1970: winner = 0, steps = 8\n",
      "03:07:33 [INFO] train episode 1971: winner = 1, steps = 4\n",
      "03:07:50 [INFO] train episode 1972: winner = -1, steps = 5\n",
      "03:08:18 [INFO] train episode 1973: winner = -1, steps = 7\n",
      "03:08:34 [INFO] train episode 1974: winner = -1, steps = 7\n",
      "03:08:49 [INFO] train episode 1975: winner = 0, steps = 8\n",
      "03:08:59 [INFO] train episode 1976: winner = 1, steps = 6\n",
      "03:09:07 [INFO] train episode 1977: winner = 0, steps = 8\n",
      "03:09:27 [INFO] train episode 1978: winner = 1, steps = 6\n",
      "03:09:43 [INFO] train episode 1979: winner = 0, steps = 8\n",
      "03:09:52 [INFO] train episode 1980: winner = 1, steps = 6\n",
      "03:09:58 [INFO] train episode 1981: winner = 1, steps = 6\n",
      "03:10:03 [INFO] train episode 1982: winner = -1, steps = 5\n",
      "03:10:22 [INFO] train episode 1983: winner = 1, steps = 6\n",
      "03:10:30 [INFO] train episode 1984: winner = 1, steps = 6\n",
      "03:10:32 [INFO] train episode 1985: winner = 1, steps = 8\n",
      "03:10:48 [INFO] train episode 1986: winner = -1, steps = 7\n",
      "03:10:51 [INFO] train episode 1987: winner = 1, steps = 4\n",
      "03:10:54 [INFO] train episode 1988: winner = -1, steps = 5\n",
      "03:10:56 [INFO] train episode 1989: winner = 0, steps = 8\n",
      "03:11:03 [INFO] train episode 1990: winner = 1, steps = 4\n",
      "03:11:04 [INFO] train episode 1991: winner = 1, steps = 4\n",
      "03:11:19 [INFO] train episode 1992: winner = 0, steps = 8\n",
      "03:11:23 [INFO] train episode 1993: winner = 1, steps = 6\n",
      "03:11:28 [INFO] train episode 1994: winner = 1, steps = 6\n",
      "03:11:33 [INFO] train episode 1995: winner = 1, steps = 6\n",
      "03:11:36 [INFO] train episode 1996: winner = 1, steps = 6\n",
      "03:11:40 [INFO] train episode 1997: winner = -1, steps = 7\n",
      "03:11:44 [INFO] train episode 1998: winner = 0, steps = 8\n",
      "03:11:45 [INFO] train episode 1999: winner = 1, steps = 8\n",
      "03:11:47 [INFO] train episode 2000: winner = 1, steps = 4\n",
      "03:11:48 [INFO] train episode 2001: winner = 1, steps = 6\n",
      "03:11:54 [INFO] train episode 2002: winner = 1, steps = 4\n",
      "03:12:00 [INFO] train episode 2003: winner = 1, steps = 4\n",
      "03:12:09 [INFO] train episode 2004: winner = 1, steps = 6\n",
      "03:12:10 [INFO] train episode 2005: winner = -1, steps = 5\n",
      "03:12:13 [INFO] train episode 2006: winner = 0, steps = 8\n",
      "03:12:15 [INFO] train episode 2007: winner = 1, steps = 6\n",
      "03:12:15 [INFO] train episode 2008: winner = 1, steps = 4\n",
      "03:12:17 [INFO] train episode 2009: winner = 1, steps = 6\n",
      "03:12:23 [INFO] train episode 2010: winner = 0, steps = 8\n",
      "03:12:24 [INFO] train episode 2011: winner = 1, steps = 6\n",
      "03:12:29 [INFO] train episode 2012: winner = 0, steps = 8\n",
      "03:12:31 [INFO] train episode 2013: winner = 1, steps = 4\n",
      "03:12:33 [INFO] train episode 2014: winner = -1, steps = 5\n",
      "03:12:34 [INFO] train episode 2015: winner = -1, steps = 7\n",
      "03:12:34 [INFO] train episode 2016: winner = 1, steps = 8\n",
      "03:12:37 [INFO] train episode 2017: winner = 1, steps = 4\n",
      "03:12:43 [INFO] train episode 2018: winner = 1, steps = 4\n",
      "03:12:47 [INFO] train episode 2019: winner = 0, steps = 8\n",
      "03:12:48 [INFO] train episode 2020: winner = 1, steps = 4\n",
      "03:12:50 [INFO] train episode 2021: winner = 0, steps = 8\n",
      "03:13:00 [INFO] train episode 2022: winner = 1, steps = 6\n",
      "03:13:00 [INFO] train episode 2023: winner = 1, steps = 6\n",
      "03:13:05 [INFO] train episode 2024: winner = 1, steps = 6\n",
      "03:13:06 [INFO] train episode 2025: winner = 1, steps = 6\n",
      "03:13:06 [INFO] train episode 2026: winner = 0, steps = 8\n",
      "03:13:06 [INFO] train episode 2027: winner = -1, steps = 7\n",
      "03:13:06 [INFO] train episode 2028: winner = 1, steps = 4\n",
      "03:13:06 [INFO] train episode 2029: winner = 1, steps = 6\n",
      "03:13:09 [INFO] train episode 2030: winner = 1, steps = 6\n",
      "03:13:12 [INFO] train episode 2031: winner = 1, steps = 6\n",
      "03:13:17 [INFO] train episode 2032: winner = 0, steps = 8\n",
      "03:13:22 [INFO] train episode 2033: winner = 0, steps = 8\n",
      "03:13:26 [INFO] train episode 2034: winner = 0, steps = 8\n",
      "03:13:29 [INFO] train episode 2035: winner = 0, steps = 8\n",
      "03:13:29 [INFO] train episode 2036: winner = 1, steps = 6\n",
      "03:13:31 [INFO] train episode 2037: winner = 1, steps = 4\n",
      "03:13:32 [INFO] train episode 2038: winner = 0, steps = 8\n",
      "03:13:35 [INFO] train episode 2039: winner = 0, steps = 8\n",
      "03:13:36 [INFO] train episode 2040: winner = 1, steps = 4\n",
      "03:13:37 [INFO] train episode 2041: winner = 0, steps = 8\n",
      "03:13:37 [INFO] train episode 2042: winner = 1, steps = 4\n",
      "03:13:40 [INFO] train episode 2043: winner = -1, steps = 5\n",
      "03:13:40 [INFO] train episode 2044: winner = 0, steps = 8\n",
      "03:13:40 [INFO] train episode 2045: winner = 1, steps = 4\n",
      "03:13:41 [INFO] train episode 2046: winner = -1, steps = 7\n",
      "03:13:46 [INFO] train episode 2047: winner = 0, steps = 8\n",
      "03:13:46 [INFO] train episode 2048: winner = 1, steps = 4\n",
      "03:13:49 [INFO] train episode 2049: winner = 0, steps = 8\n",
      "03:13:50 [INFO] train episode 2050: winner = 1, steps = 6\n",
      "03:13:50 [INFO] train episode 2051: winner = 1, steps = 6\n",
      "03:13:50 [INFO] train episode 2052: winner = 0, steps = 8\n",
      "03:13:54 [INFO] train episode 2053: winner = 1, steps = 6\n",
      "03:13:56 [INFO] train episode 2054: winner = 0, steps = 8\n",
      "03:13:56 [INFO] train episode 2055: winner = 0, steps = 8\n",
      "03:13:56 [INFO] train episode 2056: winner = -1, steps = 5\n",
      "03:14:00 [INFO] train episode 2057: winner = 1, steps = 6\n",
      "03:14:00 [INFO] train episode 2058: winner = 1, steps = 6\n",
      "03:14:01 [INFO] train episode 2059: winner = 1, steps = 6\n",
      "03:14:01 [INFO] train episode 2060: winner = 0, steps = 8\n",
      "03:14:02 [INFO] train episode 2061: winner = 0, steps = 8\n",
      "03:14:07 [INFO] train episode 2062: winner = -1, steps = 5\n",
      "03:14:11 [INFO] train episode 2063: winner = -1, steps = 7\n",
      "03:14:12 [INFO] train episode 2064: winner = -1, steps = 7\n",
      "03:14:13 [INFO] train episode 2065: winner = 1, steps = 8\n",
      "03:14:15 [INFO] train episode 2066: winner = 0, steps = 8\n",
      "03:14:18 [INFO] train episode 2067: winner = 1, steps = 4\n",
      "03:14:19 [INFO] train episode 2068: winner = 0, steps = 8\n",
      "03:14:20 [INFO] train episode 2069: winner = 1, steps = 6\n",
      "03:14:22 [INFO] train episode 2070: winner = 1, steps = 4\n",
      "03:14:22 [INFO] train episode 2071: winner = -1, steps = 7\n",
      "03:14:26 [INFO] train episode 2072: winner = -1, steps = 5\n",
      "03:14:28 [INFO] train episode 2073: winner = 0, steps = 8\n",
      "03:14:29 [INFO] train episode 2074: winner = 1, steps = 4\n",
      "03:14:32 [INFO] train episode 2075: winner = 0, steps = 8\n",
      "03:14:33 [INFO] train episode 2076: winner = 1, steps = 4\n",
      "03:14:38 [INFO] train episode 2077: winner = 0, steps = 8\n",
      "03:14:38 [INFO] train episode 2078: winner = 1, steps = 4\n",
      "03:14:40 [INFO] train episode 2079: winner = 0, steps = 8\n",
      "03:14:41 [INFO] train episode 2080: winner = 1, steps = 6\n",
      "03:14:41 [INFO] train episode 2081: winner = 0, steps = 8\n",
      "03:14:41 [INFO] train episode 2082: winner = 1, steps = 6\n",
      "03:14:42 [INFO] train episode 2083: winner = 1, steps = 4\n",
      "03:14:44 [INFO] train episode 2084: winner = -1, steps = 7\n",
      "03:14:44 [INFO] train episode 2085: winner = 1, steps = 6\n",
      "03:14:45 [INFO] train episode 2086: winner = 0, steps = 8\n",
      "03:14:45 [INFO] train episode 2087: winner = 1, steps = 4\n",
      "03:14:45 [INFO] train episode 2088: winner = 1, steps = 4\n",
      "03:14:45 [INFO] train episode 2089: winner = 0, steps = 8\n",
      "03:14:45 [INFO] train episode 2090: winner = 0, steps = 8\n",
      "03:14:46 [INFO] train episode 2091: winner = 1, steps = 6\n",
      "03:14:46 [INFO] train episode 2092: winner = 1, steps = 4\n",
      "03:14:46 [INFO] train episode 2093: winner = 1, steps = 6\n",
      "03:14:48 [INFO] train episode 2094: winner = -1, steps = 7\n",
      "03:14:49 [INFO] train episode 2095: winner = 1, steps = 6\n",
      "03:14:51 [INFO] train episode 2096: winner = 1, steps = 4\n",
      "03:14:53 [INFO] train episode 2097: winner = 1, steps = 6\n",
      "03:14:56 [INFO] train episode 2098: winner = 0, steps = 8\n",
      "03:14:59 [INFO] train episode 2099: winner = 1, steps = 4\n",
      "03:15:01 [INFO] train episode 2100: winner = 1, steps = 4\n",
      "03:15:01 [INFO] test episode 2100:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:15:24 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "03:15:40 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "+o+\n",
      "++x\n",
      "03:15:55 [INFO] step 2：player 1, action (2, 1)\n",
      "+++\n",
      "+o+\n",
      "+ox\n",
      "03:15:58 [INFO] step 3：player -1, action (2, 0)\n",
      "+++\n",
      "+o+\n",
      "xox\n",
      "03:16:00 [INFO] step 4：player 1, action (0, 1)\n",
      "+o+\n",
      "+o+\n",
      "xox\n",
      "03:16:00 [INFO] test episode 2100: winner = 1, steps = 4\n",
      "03:16:37 [INFO] train episode 2101: winner = 0, steps = 8\n",
      "03:16:40 [INFO] train episode 2102: winner = 0, steps = 8\n",
      "03:17:15 [INFO] train episode 2103: winner = 1, steps = 6\n",
      "03:17:51 [INFO] train episode 2104: winner = 1, steps = 6\n",
      "03:18:38 [INFO] train episode 2105: winner = 1, steps = 6\n",
      "03:18:45 [INFO] train episode 2106: winner = 1, steps = 4\n",
      "03:18:56 [INFO] train episode 2107: winner = 1, steps = 4\n",
      "03:19:18 [INFO] train episode 2108: winner = 1, steps = 4\n",
      "03:19:30 [INFO] train episode 2109: winner = -1, steps = 5\n",
      "03:19:37 [INFO] train episode 2110: winner = 1, steps = 4\n",
      "03:19:50 [INFO] train episode 2111: winner = 0, steps = 8\n",
      "03:20:04 [INFO] train episode 2112: winner = 0, steps = 8\n",
      "03:20:32 [INFO] train episode 2113: winner = 0, steps = 8\n",
      "03:20:37 [INFO] train episode 2114: winner = 1, steps = 6\n",
      "03:20:51 [INFO] train episode 2115: winner = 1, steps = 8\n",
      "03:20:56 [INFO] train episode 2116: winner = 1, steps = 4\n",
      "03:21:02 [INFO] train episode 2117: winner = 1, steps = 6\n",
      "03:21:05 [INFO] train episode 2118: winner = 0, steps = 8\n",
      "03:21:12 [INFO] train episode 2119: winner = 0, steps = 8\n",
      "03:21:15 [INFO] train episode 2120: winner = 1, steps = 6\n",
      "03:21:18 [INFO] train episode 2121: winner = 1, steps = 8\n",
      "03:21:28 [INFO] train episode 2122: winner = -1, steps = 5\n",
      "03:21:29 [INFO] train episode 2123: winner = 1, steps = 6\n",
      "03:21:43 [INFO] train episode 2124: winner = -1, steps = 7\n",
      "03:21:58 [INFO] train episode 2125: winner = 1, steps = 6\n",
      "03:21:59 [INFO] train episode 2126: winner = 1, steps = 6\n",
      "03:22:09 [INFO] train episode 2127: winner = 1, steps = 6\n",
      "03:22:14 [INFO] train episode 2128: winner = 1, steps = 4\n",
      "03:22:22 [INFO] train episode 2129: winner = -1, steps = 5\n",
      "03:22:29 [INFO] train episode 2130: winner = 1, steps = 4\n",
      "03:22:53 [INFO] train episode 2131: winner = 1, steps = 6\n",
      "03:22:54 [INFO] train episode 2132: winner = 0, steps = 8\n",
      "03:22:57 [INFO] train episode 2133: winner = -1, steps = 5\n",
      "03:22:59 [INFO] train episode 2134: winner = 1, steps = 4\n",
      "03:23:09 [INFO] train episode 2135: winner = 1, steps = 4\n",
      "03:23:12 [INFO] train episode 2136: winner = 1, steps = 6\n",
      "03:23:12 [INFO] train episode 2137: winner = 0, steps = 8\n",
      "03:23:18 [INFO] train episode 2138: winner = 1, steps = 6\n",
      "03:23:18 [INFO] train episode 2139: winner = 1, steps = 4\n",
      "03:23:19 [INFO] train episode 2140: winner = 1, steps = 4\n",
      "03:23:22 [INFO] train episode 2141: winner = 0, steps = 8\n",
      "03:23:31 [INFO] train episode 2142: winner = 0, steps = 8\n",
      "03:23:40 [INFO] train episode 2143: winner = 0, steps = 8\n",
      "03:23:42 [INFO] train episode 2144: winner = 0, steps = 8\n",
      "03:23:45 [INFO] train episode 2145: winner = -1, steps = 5\n",
      "03:23:57 [INFO] train episode 2146: winner = 1, steps = 6\n",
      "03:24:00 [INFO] train episode 2147: winner = 0, steps = 8\n",
      "03:24:02 [INFO] train episode 2148: winner = 0, steps = 8\n",
      "03:24:11 [INFO] train episode 2149: winner = 1, steps = 8\n",
      "03:24:12 [INFO] train episode 2150: winner = 1, steps = 4\n",
      "03:24:15 [INFO] train episode 2151: winner = -1, steps = 7\n",
      "03:24:15 [INFO] train episode 2152: winner = 1, steps = 6\n",
      "03:24:28 [INFO] train episode 2153: winner = 0, steps = 8\n",
      "03:24:39 [INFO] train episode 2154: winner = 1, steps = 6\n",
      "03:24:40 [INFO] train episode 2155: winner = -1, steps = 7\n",
      "03:24:40 [INFO] train episode 2156: winner = 1, steps = 4\n",
      "03:24:49 [INFO] train episode 2157: winner = 1, steps = 6\n",
      "03:24:52 [INFO] train episode 2158: winner = 1, steps = 8\n",
      "03:24:57 [INFO] train episode 2159: winner = 1, steps = 6\n",
      "03:25:01 [INFO] train episode 2160: winner = 0, steps = 8\n",
      "03:25:07 [INFO] train episode 2161: winner = 0, steps = 8\n",
      "03:25:07 [INFO] train episode 2162: winner = 1, steps = 6\n",
      "03:25:10 [INFO] train episode 2163: winner = -1, steps = 5\n",
      "03:25:14 [INFO] train episode 2164: winner = 1, steps = 4\n",
      "03:25:16 [INFO] train episode 2165: winner = 0, steps = 8\n",
      "03:25:16 [INFO] train episode 2166: winner = 1, steps = 4\n",
      "03:25:17 [INFO] train episode 2167: winner = 1, steps = 6\n",
      "03:25:19 [INFO] train episode 2168: winner = 1, steps = 6\n",
      "03:25:24 [INFO] train episode 2169: winner = 1, steps = 6\n",
      "03:25:28 [INFO] train episode 2170: winner = 0, steps = 8\n",
      "03:25:28 [INFO] train episode 2171: winner = 0, steps = 8\n",
      "03:25:31 [INFO] train episode 2172: winner = -1, steps = 7\n",
      "03:25:31 [INFO] train episode 2173: winner = 1, steps = 6\n",
      "03:25:31 [INFO] train episode 2174: winner = 1, steps = 4\n",
      "03:25:32 [INFO] train episode 2175: winner = 1, steps = 6\n",
      "03:25:33 [INFO] train episode 2176: winner = 0, steps = 8\n",
      "03:25:35 [INFO] train episode 2177: winner = -1, steps = 5\n",
      "03:25:39 [INFO] train episode 2178: winner = 0, steps = 8\n",
      "03:25:42 [INFO] train episode 2179: winner = 0, steps = 8\n",
      "03:25:43 [INFO] train episode 2180: winner = 1, steps = 4\n",
      "03:25:44 [INFO] train episode 2181: winner = -1, steps = 5\n",
      "03:25:45 [INFO] train episode 2182: winner = 0, steps = 8\n",
      "03:25:49 [INFO] train episode 2183: winner = 1, steps = 6\n",
      "03:25:52 [INFO] train episode 2184: winner = 0, steps = 8\n",
      "03:25:54 [INFO] train episode 2185: winner = 1, steps = 6\n",
      "03:25:59 [INFO] train episode 2186: winner = 0, steps = 8\n",
      "03:26:00 [INFO] train episode 2187: winner = -1, steps = 5\n",
      "03:26:00 [INFO] train episode 2188: winner = 0, steps = 8\n",
      "03:26:05 [INFO] train episode 2189: winner = 0, steps = 8\n",
      "03:26:06 [INFO] train episode 2190: winner = -1, steps = 7\n",
      "03:26:06 [INFO] train episode 2191: winner = 1, steps = 4\n",
      "03:26:07 [INFO] train episode 2192: winner = 0, steps = 8\n",
      "03:26:07 [INFO] train episode 2193: winner = 1, steps = 4\n",
      "03:26:10 [INFO] train episode 2194: winner = 0, steps = 8\n",
      "03:26:11 [INFO] train episode 2195: winner = 0, steps = 8\n",
      "03:26:13 [INFO] train episode 2196: winner = 0, steps = 8\n",
      "03:26:13 [INFO] train episode 2197: winner = 1, steps = 4\n",
      "03:26:15 [INFO] train episode 2198: winner = 1, steps = 4\n",
      "03:26:17 [INFO] train episode 2199: winner = 0, steps = 8\n",
      "03:26:17 [INFO] train episode 2200: winner = 1, steps = 6\n",
      "03:26:18 [INFO] train episode 2201: winner = 0, steps = 8\n",
      "03:26:19 [INFO] train episode 2202: winner = 0, steps = 8\n",
      "03:26:20 [INFO] train episode 2203: winner = 1, steps = 8\n",
      "03:26:21 [INFO] train episode 2204: winner = 0, steps = 8\n",
      "03:26:27 [INFO] train episode 2205: winner = 0, steps = 8\n",
      "03:26:28 [INFO] train episode 2206: winner = 1, steps = 6\n",
      "03:26:30 [INFO] train episode 2207: winner = 0, steps = 8\n",
      "03:26:31 [INFO] train episode 2208: winner = 0, steps = 8\n",
      "03:26:32 [INFO] train episode 2209: winner = 1, steps = 6\n",
      "03:26:34 [INFO] train episode 2210: winner = 0, steps = 8\n",
      "03:26:34 [INFO] train episode 2211: winner = 0, steps = 8\n",
      "03:26:35 [INFO] train episode 2212: winner = -1, steps = 5\n",
      "03:26:36 [INFO] train episode 2213: winner = 1, steps = 8\n",
      "03:26:37 [INFO] train episode 2214: winner = 1, steps = 6\n",
      "03:26:41 [INFO] train episode 2215: winner = 1, steps = 4\n",
      "03:26:42 [INFO] train episode 2216: winner = 1, steps = 8\n",
      "03:26:43 [INFO] train episode 2217: winner = 1, steps = 6\n",
      "03:26:46 [INFO] train episode 2218: winner = 0, steps = 8\n",
      "03:26:46 [INFO] train episode 2219: winner = 1, steps = 6\n",
      "03:26:46 [INFO] train episode 2220: winner = 0, steps = 8\n",
      "03:26:46 [INFO] train episode 2221: winner = 1, steps = 6\n",
      "03:26:46 [INFO] train episode 2222: winner = 1, steps = 6\n",
      "03:26:48 [INFO] train episode 2223: winner = 0, steps = 8\n",
      "03:26:49 [INFO] train episode 2224: winner = 1, steps = 8\n",
      "03:26:51 [INFO] train episode 2225: winner = 0, steps = 8\n",
      "03:26:51 [INFO] train episode 2226: winner = 1, steps = 6\n",
      "03:26:53 [INFO] train episode 2227: winner = 1, steps = 4\n",
      "03:26:53 [INFO] train episode 2228: winner = 1, steps = 4\n",
      "03:26:58 [INFO] train episode 2229: winner = 1, steps = 6\n",
      "03:27:00 [INFO] train episode 2230: winner = 1, steps = 8\n",
      "03:27:00 [INFO] train episode 2231: winner = 0, steps = 8\n",
      "03:27:02 [INFO] train episode 2232: winner = 0, steps = 8\n",
      "03:27:06 [INFO] train episode 2233: winner = 1, steps = 8\n",
      "03:27:10 [INFO] train episode 2234: winner = 1, steps = 8\n",
      "03:27:10 [INFO] test episode 2234:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:27:33 [INFO] step 0：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "o++\n",
      "03:27:51 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "x++\n",
      "o++\n",
      "03:28:08 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "x++\n",
      "o++\n",
      "03:28:10 [INFO] step 3：player -1, action (1, 1)\n",
      "++o\n",
      "xx+\n",
      "o++\n",
      "03:28:10 [INFO] step 4：player 1, action (1, 2)\n",
      "++o\n",
      "xxo\n",
      "o++\n",
      "03:28:10 [INFO] step 5：player -1, action (2, 2)\n",
      "++o\n",
      "xxo\n",
      "o+x\n",
      "03:28:11 [INFO] step 6：player 1, action (0, 0)\n",
      "o+o\n",
      "xxo\n",
      "o+x\n",
      "03:28:11 [INFO] step 7：player -1, action (0, 1)\n",
      "oxo\n",
      "xxo\n",
      "o+x\n",
      "03:28:11 [INFO] step 8：player 1, action (2, 1)\n",
      "oxo\n",
      "xxo\n",
      "oox\n",
      "03:28:11 [INFO] test episode 2234: winner = 0, steps = 8\n",
      "03:28:44 [INFO] train episode 2235: winner = -1, steps = 5\n",
      "03:28:50 [INFO] train episode 2236: winner = 1, steps = 4\n",
      "03:29:18 [INFO] train episode 2237: winner = 1, steps = 4\n",
      "03:29:27 [INFO] train episode 2238: winner = -1, steps = 5\n",
      "03:30:10 [INFO] train episode 2239: winner = 0, steps = 8\n",
      "03:30:25 [INFO] train episode 2240: winner = 0, steps = 8\n",
      "03:30:33 [INFO] train episode 2241: winner = 1, steps = 4\n",
      "03:31:13 [INFO] train episode 2242: winner = 0, steps = 8\n",
      "03:31:33 [INFO] train episode 2243: winner = 1, steps = 4\n",
      "03:31:39 [INFO] train episode 2244: winner = 1, steps = 4\n",
      "03:31:50 [INFO] train episode 2245: winner = 1, steps = 6\n",
      "03:32:17 [INFO] train episode 2246: winner = -1, steps = 7\n",
      "03:32:23 [INFO] train episode 2247: winner = 1, steps = 6\n",
      "03:32:33 [INFO] train episode 2248: winner = 1, steps = 6\n",
      "03:32:53 [INFO] train episode 2249: winner = 0, steps = 8\n",
      "03:33:08 [INFO] train episode 2250: winner = 0, steps = 8\n",
      "03:33:11 [INFO] train episode 2251: winner = 1, steps = 4\n",
      "03:33:17 [INFO] train episode 2252: winner = 1, steps = 6\n",
      "03:33:27 [INFO] train episode 2253: winner = 1, steps = 6\n",
      "03:33:27 [INFO] train episode 2254: winner = 1, steps = 4\n",
      "03:33:31 [INFO] train episode 2255: winner = 1, steps = 4\n",
      "03:33:38 [INFO] train episode 2256: winner = -1, steps = 7\n",
      "03:33:38 [INFO] train episode 2257: winner = 1, steps = 4\n",
      "03:33:40 [INFO] train episode 2258: winner = -1, steps = 7\n",
      "03:33:48 [INFO] train episode 2259: winner = 1, steps = 4\n",
      "03:33:53 [INFO] train episode 2260: winner = 0, steps = 8\n",
      "03:33:55 [INFO] train episode 2261: winner = 1, steps = 6\n",
      "03:34:00 [INFO] train episode 2262: winner = 1, steps = 4\n",
      "03:34:00 [INFO] train episode 2263: winner = 0, steps = 8\n",
      "03:34:01 [INFO] train episode 2264: winner = 1, steps = 6\n",
      "03:34:04 [INFO] train episode 2265: winner = 1, steps = 6\n",
      "03:34:06 [INFO] train episode 2266: winner = 1, steps = 4\n",
      "03:34:10 [INFO] train episode 2267: winner = 0, steps = 8\n",
      "03:34:17 [INFO] train episode 2268: winner = 0, steps = 8\n",
      "03:34:25 [INFO] train episode 2269: winner = 1, steps = 4\n",
      "03:34:34 [INFO] train episode 2270: winner = 1, steps = 6\n",
      "03:34:39 [INFO] train episode 2271: winner = 1, steps = 6\n",
      "03:34:41 [INFO] train episode 2272: winner = 1, steps = 4\n",
      "03:34:42 [INFO] train episode 2273: winner = 1, steps = 6\n",
      "03:34:46 [INFO] train episode 2274: winner = -1, steps = 5\n",
      "03:34:49 [INFO] train episode 2275: winner = 1, steps = 6\n",
      "03:34:53 [INFO] train episode 2276: winner = 1, steps = 4\n",
      "03:34:54 [INFO] train episode 2277: winner = 1, steps = 4\n",
      "03:34:57 [INFO] train episode 2278: winner = 1, steps = 6\n",
      "03:35:02 [INFO] train episode 2279: winner = -1, steps = 7\n",
      "03:35:03 [INFO] train episode 2280: winner = -1, steps = 7\n",
      "03:35:04 [INFO] train episode 2281: winner = 1, steps = 4\n",
      "03:35:07 [INFO] train episode 2282: winner = 1, steps = 4\n",
      "03:35:18 [INFO] train episode 2283: winner = 0, steps = 8\n",
      "03:35:23 [INFO] train episode 2284: winner = 1, steps = 6\n",
      "03:35:23 [INFO] train episode 2285: winner = 1, steps = 4\n",
      "03:35:30 [INFO] train episode 2286: winner = 0, steps = 8\n",
      "03:35:33 [INFO] train episode 2287: winner = 1, steps = 6\n",
      "03:35:34 [INFO] train episode 2288: winner = 1, steps = 6\n",
      "03:35:35 [INFO] train episode 2289: winner = 1, steps = 4\n",
      "03:35:44 [INFO] train episode 2290: winner = 1, steps = 8\n",
      "03:35:47 [INFO] train episode 2291: winner = -1, steps = 7\n",
      "03:35:50 [INFO] train episode 2292: winner = 0, steps = 8\n",
      "03:35:56 [INFO] train episode 2293: winner = 0, steps = 8\n",
      "03:35:57 [INFO] train episode 2294: winner = -1, steps = 7\n",
      "03:35:58 [INFO] train episode 2295: winner = 1, steps = 6\n",
      "03:36:00 [INFO] train episode 2296: winner = 1, steps = 4\n",
      "03:36:02 [INFO] train episode 2297: winner = 1, steps = 4\n",
      "03:36:02 [INFO] train episode 2298: winner = 1, steps = 6\n",
      "03:36:05 [INFO] train episode 2299: winner = 1, steps = 6\n",
      "03:36:10 [INFO] train episode 2300: winner = 1, steps = 4\n",
      "03:36:11 [INFO] train episode 2301: winner = 1, steps = 6\n",
      "03:36:12 [INFO] train episode 2302: winner = 0, steps = 8\n",
      "03:36:12 [INFO] train episode 2303: winner = 0, steps = 8\n",
      "03:36:19 [INFO] train episode 2304: winner = 0, steps = 8\n",
      "03:36:22 [INFO] train episode 2305: winner = 0, steps = 8\n",
      "03:36:22 [INFO] train episode 2306: winner = 1, steps = 6\n",
      "03:36:23 [INFO] train episode 2307: winner = 1, steps = 6\n",
      "03:36:26 [INFO] train episode 2308: winner = 1, steps = 6\n",
      "03:36:26 [INFO] train episode 2309: winner = 1, steps = 4\n",
      "03:36:28 [INFO] train episode 2310: winner = 1, steps = 8\n",
      "03:36:31 [INFO] train episode 2311: winner = -1, steps = 7\n",
      "03:36:36 [INFO] train episode 2312: winner = 0, steps = 8\n",
      "03:36:41 [INFO] train episode 2313: winner = 1, steps = 6\n",
      "03:36:41 [INFO] train episode 2314: winner = 0, steps = 8\n",
      "03:36:42 [INFO] train episode 2315: winner = -1, steps = 5\n",
      "03:36:45 [INFO] train episode 2316: winner = 0, steps = 8\n",
      "03:36:45 [INFO] train episode 2317: winner = 1, steps = 6\n",
      "03:36:45 [INFO] train episode 2318: winner = 0, steps = 8\n",
      "03:36:46 [INFO] train episode 2319: winner = -1, steps = 5\n",
      "03:36:46 [INFO] train episode 2320: winner = 1, steps = 4\n",
      "03:36:50 [INFO] train episode 2321: winner = 0, steps = 8\n",
      "03:36:53 [INFO] train episode 2322: winner = 1, steps = 6\n",
      "03:36:53 [INFO] train episode 2323: winner = 1, steps = 6\n",
      "03:36:54 [INFO] train episode 2324: winner = -1, steps = 5\n",
      "03:36:54 [INFO] train episode 2325: winner = 1, steps = 4\n",
      "03:36:55 [INFO] train episode 2326: winner = 1, steps = 6\n",
      "03:36:55 [INFO] train episode 2327: winner = 0, steps = 8\n",
      "03:36:58 [INFO] train episode 2328: winner = 1, steps = 4\n",
      "03:36:59 [INFO] train episode 2329: winner = -1, steps = 5\n",
      "03:37:02 [INFO] train episode 2330: winner = 1, steps = 6\n",
      "03:37:02 [INFO] train episode 2331: winner = 1, steps = 4\n",
      "03:37:02 [INFO] train episode 2332: winner = 1, steps = 6\n",
      "03:37:03 [INFO] train episode 2333: winner = 1, steps = 6\n",
      "03:37:04 [INFO] train episode 2334: winner = 0, steps = 8\n",
      "03:37:06 [INFO] train episode 2335: winner = 1, steps = 6\n",
      "03:37:11 [INFO] train episode 2336: winner = 0, steps = 8\n",
      "03:37:17 [INFO] train episode 2337: winner = 0, steps = 8\n",
      "03:37:19 [INFO] train episode 2338: winner = 1, steps = 4\n",
      "03:37:19 [INFO] train episode 2339: winner = 0, steps = 8\n",
      "03:37:20 [INFO] train episode 2340: winner = 1, steps = 4\n",
      "03:37:23 [INFO] train episode 2341: winner = 0, steps = 8\n",
      "03:37:25 [INFO] train episode 2342: winner = 0, steps = 8\n",
      "03:37:25 [INFO] train episode 2343: winner = 1, steps = 6\n",
      "03:37:26 [INFO] train episode 2344: winner = 1, steps = 6\n",
      "03:37:27 [INFO] train episode 2345: winner = -1, steps = 5\n",
      "03:37:28 [INFO] train episode 2346: winner = 1, steps = 6\n",
      "03:37:28 [INFO] train episode 2347: winner = 1, steps = 4\n",
      "03:37:29 [INFO] train episode 2348: winner = -1, steps = 7\n",
      "03:37:30 [INFO] train episode 2349: winner = 1, steps = 4\n",
      "03:37:30 [INFO] train episode 2350: winner = 1, steps = 6\n",
      "03:37:31 [INFO] train episode 2351: winner = 1, steps = 4\n",
      "03:37:33 [INFO] train episode 2352: winner = -1, steps = 5\n",
      "03:37:34 [INFO] train episode 2353: winner = 1, steps = 4\n",
      "03:37:34 [INFO] train episode 2354: winner = -1, steps = 7\n",
      "03:37:34 [INFO] train episode 2355: winner = 1, steps = 4\n",
      "03:37:35 [INFO] train episode 2356: winner = 1, steps = 4\n",
      "03:37:37 [INFO] train episode 2357: winner = 1, steps = 4\n",
      "03:37:41 [INFO] train episode 2358: winner = 1, steps = 4\n",
      "03:37:42 [INFO] train episode 2359: winner = 0, steps = 8\n",
      "03:37:43 [INFO] train episode 2360: winner = -1, steps = 7\n",
      "03:37:44 [INFO] train episode 2361: winner = -1, steps = 5\n",
      "03:37:44 [INFO] train episode 2362: winner = 1, steps = 4\n",
      "03:37:45 [INFO] train episode 2363: winner = 1, steps = 6\n",
      "03:37:45 [INFO] train episode 2364: winner = 1, steps = 6\n",
      "03:37:47 [INFO] train episode 2365: winner = -1, steps = 7\n",
      "03:37:48 [INFO] train episode 2366: winner = 1, steps = 6\n",
      "03:37:48 [INFO] train episode 2367: winner = 0, steps = 8\n",
      "03:37:48 [INFO] train episode 2368: winner = 1, steps = 4\n",
      "03:37:48 [INFO] train episode 2369: winner = 1, steps = 6\n",
      "03:37:49 [INFO] train episode 2370: winner = 1, steps = 6\n",
      "03:37:50 [INFO] train episode 2371: winner = 0, steps = 8\n",
      "03:37:51 [INFO] train episode 2372: winner = 1, steps = 4\n",
      "03:37:52 [INFO] train episode 2373: winner = -1, steps = 7\n",
      "03:37:53 [INFO] train episode 2374: winner = 1, steps = 4\n",
      "03:37:53 [INFO] train episode 2375: winner = 1, steps = 6\n",
      "03:37:53 [INFO] train episode 2376: winner = 1, steps = 6\n",
      "03:37:55 [INFO] train episode 2377: winner = -1, steps = 5\n",
      "03:37:55 [INFO] train episode 2378: winner = 0, steps = 8\n",
      "03:37:58 [INFO] train episode 2379: winner = 1, steps = 6\n",
      "03:37:58 [INFO] test episode 2379:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:38:20 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "03:38:40 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "+o+\n",
      "+++\n",
      "03:38:54 [INFO] step 2：player 1, action (0, 0)\n",
      "o+x\n",
      "+o+\n",
      "+++\n",
      "03:38:56 [INFO] step 3：player -1, action (1, 2)\n",
      "o+x\n",
      "+ox\n",
      "+++\n",
      "03:38:56 [INFO] step 4：player 1, action (2, 2)\n",
      "o+x\n",
      "+ox\n",
      "++o\n",
      "03:38:56 [INFO] test episode 2379: winner = 1, steps = 4\n",
      "03:39:29 [INFO] train episode 2380: winner = 1, steps = 4\n",
      "03:40:10 [INFO] train episode 2381: winner = 1, steps = 8\n",
      "03:40:46 [INFO] train episode 2382: winner = 1, steps = 6\n",
      "03:41:18 [INFO] train episode 2383: winner = 1, steps = 6\n",
      "03:41:46 [INFO] train episode 2384: winner = 0, steps = 8\n",
      "03:41:47 [INFO] train episode 2385: winner = 1, steps = 4\n",
      "03:42:02 [INFO] train episode 2386: winner = -1, steps = 7\n",
      "03:42:14 [INFO] train episode 2387: winner = 1, steps = 4\n",
      "03:42:27 [INFO] train episode 2388: winner = -1, steps = 7\n",
      "03:42:27 [INFO] train episode 2389: winner = -1, steps = 5\n",
      "03:42:36 [INFO] train episode 2390: winner = -1, steps = 7\n",
      "03:42:58 [INFO] train episode 2391: winner = 0, steps = 8\n",
      "03:43:13 [INFO] train episode 2392: winner = 1, steps = 4\n",
      "03:43:29 [INFO] train episode 2393: winner = 1, steps = 4\n",
      "03:43:36 [INFO] train episode 2394: winner = 1, steps = 6\n",
      "03:43:44 [INFO] train episode 2395: winner = 0, steps = 8\n",
      "03:43:53 [INFO] train episode 2396: winner = -1, steps = 7\n",
      "03:44:00 [INFO] train episode 2397: winner = 0, steps = 8\n",
      "03:44:13 [INFO] train episode 2398: winner = 0, steps = 8\n",
      "03:44:18 [INFO] train episode 2399: winner = 1, steps = 6\n",
      "03:44:28 [INFO] train episode 2400: winner = 1, steps = 6\n",
      "03:44:32 [INFO] train episode 2401: winner = 0, steps = 8\n",
      "03:44:35 [INFO] train episode 2402: winner = 0, steps = 8\n",
      "03:44:41 [INFO] train episode 2403: winner = 1, steps = 6\n",
      "03:44:43 [INFO] train episode 2404: winner = -1, steps = 5\n",
      "03:44:46 [INFO] train episode 2405: winner = -1, steps = 5\n",
      "03:44:49 [INFO] train episode 2406: winner = 0, steps = 8\n",
      "03:44:54 [INFO] train episode 2407: winner = -1, steps = 5\n",
      "03:44:59 [INFO] train episode 2408: winner = 1, steps = 6\n",
      "03:45:02 [INFO] train episode 2409: winner = 1, steps = 8\n",
      "03:45:04 [INFO] train episode 2410: winner = 1, steps = 6\n",
      "03:45:07 [INFO] train episode 2411: winner = 1, steps = 6\n",
      "03:45:10 [INFO] train episode 2412: winner = 1, steps = 4\n",
      "03:45:19 [INFO] train episode 2413: winner = 0, steps = 8\n",
      "03:45:21 [INFO] train episode 2414: winner = 1, steps = 4\n",
      "03:45:25 [INFO] train episode 2415: winner = 1, steps = 4\n",
      "03:45:26 [INFO] train episode 2416: winner = 1, steps = 4\n",
      "03:45:28 [INFO] train episode 2417: winner = 1, steps = 4\n",
      "03:45:39 [INFO] train episode 2418: winner = 1, steps = 6\n",
      "03:45:39 [INFO] train episode 2419: winner = 1, steps = 6\n",
      "03:45:43 [INFO] train episode 2420: winner = 1, steps = 6\n",
      "03:45:48 [INFO] train episode 2421: winner = 1, steps = 4\n",
      "03:45:52 [INFO] train episode 2422: winner = 1, steps = 6\n",
      "03:45:53 [INFO] train episode 2423: winner = 1, steps = 4\n",
      "03:45:58 [INFO] train episode 2424: winner = 0, steps = 8\n",
      "03:46:01 [INFO] train episode 2425: winner = 1, steps = 4\n",
      "03:46:09 [INFO] train episode 2426: winner = 0, steps = 8\n",
      "03:46:15 [INFO] train episode 2427: winner = 1, steps = 4\n",
      "03:46:15 [INFO] train episode 2428: winner = 1, steps = 6\n",
      "03:46:17 [INFO] train episode 2429: winner = 0, steps = 8\n",
      "03:46:19 [INFO] train episode 2430: winner = 1, steps = 6\n",
      "03:46:20 [INFO] train episode 2431: winner = 1, steps = 4\n",
      "03:46:24 [INFO] train episode 2432: winner = 0, steps = 8\n",
      "03:46:30 [INFO] train episode 2433: winner = -1, steps = 7\n",
      "03:46:30 [INFO] train episode 2434: winner = 1, steps = 6\n",
      "03:46:31 [INFO] train episode 2435: winner = 1, steps = 6\n",
      "03:46:34 [INFO] train episode 2436: winner = 1, steps = 4\n",
      "03:46:34 [INFO] train episode 2437: winner = 0, steps = 8\n",
      "03:46:35 [INFO] train episode 2438: winner = 1, steps = 4\n",
      "03:46:37 [INFO] train episode 2439: winner = -1, steps = 5\n",
      "03:46:39 [INFO] train episode 2440: winner = 1, steps = 6\n",
      "03:46:39 [INFO] train episode 2441: winner = 1, steps = 6\n",
      "03:46:43 [INFO] train episode 2442: winner = 1, steps = 6\n",
      "03:46:44 [INFO] train episode 2443: winner = 1, steps = 6\n",
      "03:46:47 [INFO] train episode 2444: winner = 1, steps = 4\n",
      "03:46:48 [INFO] train episode 2445: winner = 1, steps = 6\n",
      "03:46:49 [INFO] train episode 2446: winner = 1, steps = 6\n",
      "03:46:50 [INFO] train episode 2447: winner = 0, steps = 8\n",
      "03:46:53 [INFO] train episode 2448: winner = 1, steps = 6\n",
      "03:46:53 [INFO] train episode 2449: winner = 1, steps = 4\n",
      "03:46:56 [INFO] train episode 2450: winner = -1, steps = 5\n",
      "03:47:04 [INFO] train episode 2451: winner = -1, steps = 7\n",
      "03:47:09 [INFO] train episode 2452: winner = 1, steps = 8\n",
      "03:47:10 [INFO] train episode 2453: winner = 1, steps = 6\n",
      "03:47:11 [INFO] train episode 2454: winner = 0, steps = 8\n",
      "03:47:11 [INFO] train episode 2455: winner = 1, steps = 4\n",
      "03:47:12 [INFO] train episode 2456: winner = 1, steps = 4\n",
      "03:47:14 [INFO] train episode 2457: winner = 1, steps = 6\n",
      "03:47:16 [INFO] train episode 2458: winner = 1, steps = 4\n",
      "03:47:22 [INFO] train episode 2459: winner = 1, steps = 6\n",
      "03:47:22 [INFO] train episode 2460: winner = 1, steps = 6\n",
      "03:47:23 [INFO] train episode 2461: winner = 0, steps = 8\n",
      "03:47:24 [INFO] train episode 2462: winner = 1, steps = 6\n",
      "03:47:28 [INFO] train episode 2463: winner = 1, steps = 6\n",
      "03:47:28 [INFO] train episode 2464: winner = 1, steps = 4\n",
      "03:47:29 [INFO] train episode 2465: winner = 0, steps = 8\n",
      "03:47:31 [INFO] train episode 2466: winner = 0, steps = 8\n",
      "03:47:38 [INFO] train episode 2467: winner = 0, steps = 8\n",
      "03:47:39 [INFO] train episode 2468: winner = 0, steps = 8\n",
      "03:47:39 [INFO] train episode 2469: winner = -1, steps = 7\n",
      "03:47:41 [INFO] train episode 2470: winner = 1, steps = 6\n",
      "03:47:41 [INFO] train episode 2471: winner = 0, steps = 8\n",
      "03:47:41 [INFO] train episode 2472: winner = 1, steps = 4\n",
      "03:47:43 [INFO] train episode 2473: winner = -1, steps = 5\n",
      "03:47:51 [INFO] train episode 2474: winner = 1, steps = 6\n",
      "03:47:52 [INFO] train episode 2475: winner = 1, steps = 4\n",
      "03:47:53 [INFO] train episode 2476: winner = 0, steps = 8\n",
      "03:47:54 [INFO] train episode 2477: winner = 1, steps = 4\n",
      "03:47:55 [INFO] train episode 2478: winner = 1, steps = 6\n",
      "03:47:55 [INFO] train episode 2479: winner = 1, steps = 4\n",
      "03:47:56 [INFO] train episode 2480: winner = 1, steps = 6\n",
      "03:47:56 [INFO] train episode 2481: winner = 0, steps = 8\n",
      "03:47:58 [INFO] train episode 2482: winner = 1, steps = 6\n",
      "03:48:02 [INFO] train episode 2483: winner = -1, steps = 5\n",
      "03:48:03 [INFO] train episode 2484: winner = 1, steps = 6\n",
      "03:48:05 [INFO] train episode 2485: winner = 1, steps = 6\n",
      "03:48:11 [INFO] train episode 2486: winner = 0, steps = 8\n",
      "03:48:12 [INFO] train episode 2487: winner = 0, steps = 8\n",
      "03:48:12 [INFO] train episode 2488: winner = -1, steps = 5\n",
      "03:48:14 [INFO] train episode 2489: winner = 1, steps = 4\n",
      "03:48:15 [INFO] train episode 2490: winner = 0, steps = 8\n",
      "03:48:15 [INFO] train episode 2491: winner = 1, steps = 4\n",
      "03:48:15 [INFO] train episode 2492: winner = 1, steps = 6\n",
      "03:48:16 [INFO] train episode 2493: winner = 1, steps = 6\n",
      "03:48:16 [INFO] train episode 2494: winner = 1, steps = 4\n",
      "03:48:19 [INFO] train episode 2495: winner = 0, steps = 8\n",
      "03:48:20 [INFO] train episode 2496: winner = 0, steps = 8\n",
      "03:48:23 [INFO] train episode 2497: winner = -1, steps = 7\n",
      "03:48:25 [INFO] train episode 2498: winner = 1, steps = 6\n",
      "03:48:27 [INFO] train episode 2499: winner = 0, steps = 8\n",
      "03:48:31 [INFO] train episode 2500: winner = 1, steps = 6\n",
      "03:48:32 [INFO] train episode 2501: winner = 1, steps = 4\n",
      "03:48:34 [INFO] train episode 2502: winner = 0, steps = 8\n",
      "03:48:35 [INFO] train episode 2503: winner = -1, steps = 5\n",
      "03:48:37 [INFO] train episode 2504: winner = 0, steps = 8\n",
      "03:48:38 [INFO] train episode 2505: winner = 1, steps = 6\n",
      "03:48:39 [INFO] train episode 2506: winner = -1, steps = 5\n",
      "03:48:39 [INFO] train episode 2507: winner = 1, steps = 6\n",
      "03:48:40 [INFO] train episode 2508: winner = 1, steps = 4\n",
      "03:48:40 [INFO] train episode 2509: winner = 1, steps = 6\n",
      "03:48:42 [INFO] train episode 2510: winner = 0, steps = 8\n",
      "03:48:47 [INFO] train episode 2511: winner = 0, steps = 8\n",
      "03:48:48 [INFO] train episode 2512: winner = -1, steps = 7\n",
      "03:48:49 [INFO] train episode 2513: winner = -1, steps = 5\n",
      "03:48:52 [INFO] train episode 2514: winner = 1, steps = 6\n",
      "03:48:53 [INFO] train episode 2515: winner = 1, steps = 6\n",
      "03:48:53 [INFO] train episode 2516: winner = 1, steps = 4\n",
      "03:48:53 [INFO] train episode 2517: winner = 1, steps = 6\n",
      "03:48:57 [INFO] train episode 2518: winner = 0, steps = 8\n",
      "03:48:57 [INFO] train episode 2519: winner = 0, steps = 8\n",
      "03:48:57 [INFO] train episode 2520: winner = 0, steps = 8\n",
      "03:48:59 [INFO] train episode 2521: winner = 1, steps = 8\n",
      "03:48:59 [INFO] test episode 2521:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "03:49:20 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "03:49:39 [INFO] step 1：player -1, action (0, 0)\n",
      "x++\n",
      "+++\n",
      "+o+\n",
      "03:49:52 [INFO] step 2：player 1, action (2, 2)\n",
      "x++\n",
      "+++\n",
      "+oo\n",
      "03:49:54 [INFO] step 3：player -1, action (2, 0)\n",
      "x++\n",
      "+++\n",
      "xoo\n",
      "03:49:54 [INFO] step 4：player 1, action (0, 2)\n",
      "x+o\n",
      "+++\n",
      "xoo\n",
      "03:49:55 [INFO] step 5：player -1, action (1, 0)\n",
      "x+o\n",
      "x++\n",
      "xoo\n",
      "03:49:55 [INFO] test episode 2521: winner = -1, steps = 5\n",
      "03:50:30 [INFO] train episode 2522: winner = 1, steps = 8\n",
      "03:51:13 [INFO] train episode 2523: winner = 1, steps = 6\n",
      "03:51:30 [INFO] train episode 2524: winner = -1, steps = 5\n",
      "03:52:00 [INFO] train episode 2525: winner = 1, steps = 4\n",
      "03:52:31 [INFO] train episode 2526: winner = 0, steps = 8\n",
      "03:52:55 [INFO] train episode 2527: winner = -1, steps = 7\n",
      "03:53:19 [INFO] train episode 2528: winner = 1, steps = 6\n",
      "03:53:32 [INFO] train episode 2529: winner = 0, steps = 8\n",
      "03:53:44 [INFO] train episode 2530: winner = 0, steps = 8\n",
      "03:54:06 [INFO] train episode 2531: winner = -1, steps = 7\n",
      "03:54:15 [INFO] train episode 2532: winner = 0, steps = 8\n",
      "03:54:29 [INFO] train episode 2533: winner = 1, steps = 8\n",
      "03:54:39 [INFO] train episode 2534: winner = 1, steps = 4\n",
      "03:55:00 [INFO] train episode 2535: winner = 1, steps = 6\n",
      "03:55:08 [INFO] train episode 2536: winner = 1, steps = 4\n",
      "03:55:16 [INFO] train episode 2537: winner = -1, steps = 5\n",
      "03:55:18 [INFO] train episode 2538: winner = 1, steps = 4\n",
      "03:55:26 [INFO] train episode 2539: winner = -1, steps = 7\n",
      "03:55:35 [INFO] train episode 2540: winner = -1, steps = 7\n",
      "03:55:43 [INFO] train episode 2541: winner = 0, steps = 8\n",
      "03:55:47 [INFO] train episode 2542: winner = 1, steps = 6\n",
      "03:55:53 [INFO] train episode 2543: winner = 1, steps = 6\n",
      "03:55:57 [INFO] train episode 2544: winner = 0, steps = 8\n",
      "03:55:58 [INFO] train episode 2545: winner = 1, steps = 6\n",
      "03:56:10 [INFO] train episode 2546: winner = 1, steps = 4\n",
      "03:56:14 [INFO] train episode 2547: winner = 1, steps = 4\n",
      "03:56:18 [INFO] train episode 2548: winner = 0, steps = 8\n",
      "03:56:22 [INFO] train episode 2549: winner = 1, steps = 4\n",
      "03:56:25 [INFO] train episode 2550: winner = 1, steps = 4\n",
      "03:56:27 [INFO] train episode 2551: winner = 1, steps = 4\n",
      "03:56:34 [INFO] train episode 2552: winner = 0, steps = 8\n",
      "03:56:37 [INFO] train episode 2553: winner = 1, steps = 8\n",
      "03:56:40 [INFO] train episode 2554: winner = 1, steps = 4\n",
      "03:56:42 [INFO] train episode 2555: winner = -1, steps = 7\n",
      "03:56:43 [INFO] train episode 2556: winner = 1, steps = 4\n",
      "03:56:48 [INFO] train episode 2557: winner = -1, steps = 7\n",
      "03:56:52 [INFO] train episode 2558: winner = 1, steps = 6\n",
      "03:56:53 [INFO] train episode 2559: winner = -1, steps = 7\n",
      "03:56:56 [INFO] train episode 2560: winner = 0, steps = 8\n",
      "03:57:00 [INFO] train episode 2561: winner = 0, steps = 8\n",
      "03:57:00 [INFO] train episode 2562: winner = 1, steps = 4\n",
      "03:57:10 [INFO] train episode 2563: winner = 1, steps = 6\n",
      "03:57:14 [INFO] train episode 2564: winner = 1, steps = 6\n",
      "03:57:16 [INFO] train episode 2565: winner = 1, steps = 6\n",
      "03:57:19 [INFO] train episode 2566: winner = 1, steps = 6\n",
      "03:57:19 [INFO] train episode 2567: winner = 1, steps = 6\n",
      "03:57:24 [INFO] train episode 2568: winner = 0, steps = 8\n",
      "03:57:25 [INFO] train episode 2569: winner = -1, steps = 5\n",
      "03:57:28 [INFO] train episode 2570: winner = 1, steps = 6\n",
      "03:57:30 [INFO] train episode 2571: winner = -1, steps = 7\n",
      "03:57:31 [INFO] train episode 2572: winner = 1, steps = 6\n",
      "03:57:31 [INFO] train episode 2573: winner = 1, steps = 4\n",
      "03:57:31 [INFO] train episode 2574: winner = 1, steps = 4\n",
      "03:57:32 [INFO] train episode 2575: winner = 1, steps = 4\n",
      "03:57:32 [INFO] train episode 2576: winner = 1, steps = 6\n",
      "03:57:35 [INFO] train episode 2577: winner = -1, steps = 7\n",
      "03:57:39 [INFO] train episode 2578: winner = 1, steps = 4\n",
      "03:57:44 [INFO] train episode 2579: winner = 0, steps = 8\n",
      "03:57:49 [INFO] train episode 2580: winner = -1, steps = 7\n",
      "03:57:53 [INFO] train episode 2581: winner = 1, steps = 6\n",
      "03:57:55 [INFO] train episode 2582: winner = 1, steps = 6\n",
      "03:57:57 [INFO] train episode 2583: winner = 0, steps = 8\n",
      "03:57:59 [INFO] train episode 2584: winner = 1, steps = 6\n",
      "03:58:00 [INFO] train episode 2585: winner = 1, steps = 6\n",
      "03:58:00 [INFO] train episode 2586: winner = 1, steps = 4\n",
      "03:58:00 [INFO] train episode 2587: winner = 1, steps = 4\n",
      "03:58:02 [INFO] train episode 2588: winner = 1, steps = 4\n",
      "03:58:04 [INFO] train episode 2589: winner = 1, steps = 6\n",
      "03:58:07 [INFO] train episode 2590: winner = 1, steps = 8\n",
      "03:58:08 [INFO] train episode 2591: winner = 1, steps = 4\n",
      "03:58:09 [INFO] train episode 2592: winner = 1, steps = 6\n",
      "03:58:15 [INFO] train episode 2593: winner = 0, steps = 8\n",
      "03:58:15 [INFO] train episode 2594: winner = 1, steps = 4\n",
      "03:58:18 [INFO] train episode 2595: winner = 1, steps = 6\n",
      "03:58:18 [INFO] train episode 2596: winner = 1, steps = 6\n",
      "03:58:20 [INFO] train episode 2597: winner = 1, steps = 4\n",
      "03:58:22 [INFO] train episode 2598: winner = 1, steps = 6\n",
      "03:58:23 [INFO] train episode 2599: winner = 1, steps = 6\n",
      "03:58:24 [INFO] train episode 2600: winner = 1, steps = 6\n",
      "03:58:24 [INFO] train episode 2601: winner = -1, steps = 5\n",
      "03:58:26 [INFO] train episode 2602: winner = 0, steps = 8\n",
      "03:58:32 [INFO] train episode 2603: winner = 0, steps = 8\n",
      "03:58:34 [INFO] train episode 2604: winner = 0, steps = 8\n",
      "03:58:37 [INFO] train episode 2605: winner = 0, steps = 8\n",
      "03:58:37 [INFO] train episode 2606: winner = -1, steps = 5\n",
      "03:58:38 [INFO] train episode 2607: winner = 0, steps = 8\n",
      "03:58:39 [INFO] train episode 2608: winner = -1, steps = 5\n",
      "03:58:39 [INFO] train episode 2609: winner = 1, steps = 4\n",
      "03:58:45 [INFO] train episode 2610: winner = 0, steps = 8\n",
      "03:58:47 [INFO] train episode 2611: winner = -1, steps = 7\n",
      "03:58:48 [INFO] train episode 2612: winner = 1, steps = 6\n",
      "03:58:48 [INFO] train episode 2613: winner = 1, steps = 6\n",
      "03:58:50 [INFO] train episode 2614: winner = 0, steps = 8\n",
      "03:58:51 [INFO] train episode 2615: winner = 1, steps = 6\n",
      "03:58:52 [INFO] train episode 2616: winner = 0, steps = 8\n",
      "03:58:53 [INFO] train episode 2617: winner = 1, steps = 6\n",
      "03:58:55 [INFO] train episode 2618: winner = 0, steps = 8\n",
      "03:58:57 [INFO] train episode 2619: winner = 1, steps = 6\n",
      "03:58:57 [INFO] train episode 2620: winner = 1, steps = 6\n",
      "03:59:03 [INFO] train episode 2621: winner = 1, steps = 6\n",
      "03:59:04 [INFO] train episode 2622: winner = 1, steps = 6\n",
      "03:59:05 [INFO] train episode 2623: winner = 1, steps = 6\n",
      "03:59:05 [INFO] train episode 2624: winner = 1, steps = 6\n",
      "03:59:06 [INFO] train episode 2625: winner = 1, steps = 6\n",
      "03:59:06 [INFO] train episode 2626: winner = 1, steps = 4\n",
      "03:59:07 [INFO] train episode 2627: winner = 1, steps = 6\n",
      "03:59:08 [INFO] train episode 2628: winner = -1, steps = 5\n",
      "03:59:10 [INFO] train episode 2629: winner = 1, steps = 4\n",
      "03:59:11 [INFO] train episode 2630: winner = 0, steps = 8\n",
      "03:59:12 [INFO] train episode 2631: winner = 1, steps = 6\n",
      "03:59:14 [INFO] train episode 2632: winner = 0, steps = 8\n",
      "03:59:15 [INFO] train episode 2633: winner = 1, steps = 4\n",
      "03:59:16 [INFO] train episode 2634: winner = 0, steps = 8\n",
      "03:59:17 [INFO] train episode 2635: winner = 1, steps = 4\n",
      "03:59:22 [INFO] train episode 2636: winner = 0, steps = 8\n",
      "03:59:22 [INFO] train episode 2637: winner = 1, steps = 6\n",
      "03:59:23 [INFO] train episode 2638: winner = 1, steps = 6\n",
      "03:59:26 [INFO] train episode 2639: winner = 0, steps = 8\n",
      "03:59:26 [INFO] train episode 2640: winner = 1, steps = 6\n",
      "03:59:28 [INFO] train episode 2641: winner = 1, steps = 6\n",
      "03:59:28 [INFO] train episode 2642: winner = 1, steps = 4\n",
      "03:59:28 [INFO] train episode 2643: winner = 1, steps = 4\n",
      "03:59:29 [INFO] train episode 2644: winner = 1, steps = 6\n",
      "03:59:30 [INFO] train episode 2645: winner = 1, steps = 6\n",
      "03:59:30 [INFO] train episode 2646: winner = 1, steps = 6\n",
      "03:59:33 [INFO] train episode 2647: winner = 0, steps = 8\n",
      "03:59:37 [INFO] train episode 2648: winner = 1, steps = 8\n",
      "03:59:38 [INFO] train episode 2649: winner = 1, steps = 6\n",
      "03:59:38 [INFO] train episode 2650: winner = 0, steps = 8\n",
      "03:59:39 [INFO] train episode 2651: winner = 1, steps = 4\n",
      "03:59:39 [INFO] train episode 2652: winner = 1, steps = 6\n",
      "03:59:41 [INFO] train episode 2653: winner = 0, steps = 8\n",
      "03:59:42 [INFO] train episode 2654: winner = 1, steps = 6\n",
      "03:59:45 [INFO] train episode 2655: winner = -1, steps = 5\n",
      "03:59:46 [INFO] train episode 2656: winner = 1, steps = 6\n",
      "03:59:47 [INFO] train episode 2657: winner = 0, steps = 8\n",
      "03:59:47 [INFO] train episode 2658: winner = 1, steps = 4\n",
      "03:59:48 [INFO] train episode 2659: winner = -1, steps = 5\n",
      "03:59:51 [INFO] train episode 2660: winner = 0, steps = 8\n",
      "03:59:54 [INFO] train episode 2661: winner = 0, steps = 8\n",
      "03:59:56 [INFO] train episode 2662: winner = 1, steps = 4\n",
      "03:59:56 [INFO] test episode 2662:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:00:18 [INFO] step 0：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "o++\n",
      "04:00:35 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "+++\n",
      "o++\n",
      "04:00:47 [INFO] step 2：player 1, action (1, 0)\n",
      "++x\n",
      "o++\n",
      "o++\n",
      "04:00:50 [INFO] step 3：player -1, action (0, 0)\n",
      "x+x\n",
      "o++\n",
      "o++\n",
      "04:00:50 [INFO] step 4：player 1, action (2, 2)\n",
      "x+x\n",
      "o++\n",
      "o+o\n",
      "04:00:51 [INFO] step 5：player -1, action (0, 1)\n",
      "xxx\n",
      "o++\n",
      "o+o\n",
      "04:00:51 [INFO] test episode 2662: winner = -1, steps = 5\n",
      "04:01:05 [INFO] train episode 2663: winner = 1, steps = 6\n",
      "04:01:33 [INFO] train episode 2664: winner = 1, steps = 6\n",
      "04:02:02 [INFO] train episode 2665: winner = 0, steps = 8\n",
      "04:02:24 [INFO] train episode 2666: winner = -1, steps = 5\n",
      "04:02:46 [INFO] train episode 2667: winner = 0, steps = 8\n",
      "04:03:04 [INFO] train episode 2668: winner = 1, steps = 4\n",
      "04:03:15 [INFO] train episode 2669: winner = 0, steps = 8\n",
      "04:03:45 [INFO] train episode 2670: winner = 0, steps = 8\n",
      "04:04:12 [INFO] train episode 2671: winner = 0, steps = 8\n",
      "04:04:24 [INFO] train episode 2672: winner = 0, steps = 8\n",
      "04:04:49 [INFO] train episode 2673: winner = 0, steps = 8\n",
      "04:05:11 [INFO] train episode 2674: winner = 1, steps = 6\n",
      "04:05:25 [INFO] train episode 2675: winner = 0, steps = 8\n",
      "04:05:26 [INFO] train episode 2676: winner = -1, steps = 5\n",
      "04:05:27 [INFO] train episode 2677: winner = 1, steps = 4\n",
      "04:05:39 [INFO] train episode 2678: winner = 1, steps = 8\n",
      "04:05:50 [INFO] train episode 2679: winner = 0, steps = 8\n",
      "04:06:02 [INFO] train episode 2680: winner = -1, steps = 7\n",
      "04:06:08 [INFO] train episode 2681: winner = 1, steps = 6\n",
      "04:06:08 [INFO] train episode 2682: winner = 0, steps = 8\n",
      "04:06:19 [INFO] train episode 2683: winner = -1, steps = 7\n",
      "04:06:21 [INFO] train episode 2684: winner = -1, steps = 5\n",
      "04:06:23 [INFO] train episode 2685: winner = -1, steps = 5\n",
      "04:06:25 [INFO] train episode 2686: winner = -1, steps = 7\n",
      "04:06:34 [INFO] train episode 2687: winner = 1, steps = 4\n",
      "04:06:49 [INFO] train episode 2688: winner = 0, steps = 8\n",
      "04:06:51 [INFO] train episode 2689: winner = -1, steps = 7\n",
      "04:06:53 [INFO] train episode 2690: winner = 1, steps = 6\n",
      "04:06:58 [INFO] train episode 2691: winner = 1, steps = 6\n",
      "04:06:59 [INFO] train episode 2692: winner = -1, steps = 7\n",
      "04:07:20 [INFO] train episode 2693: winner = 1, steps = 4\n",
      "04:07:33 [INFO] train episode 2694: winner = 0, steps = 8\n",
      "04:07:38 [INFO] train episode 2695: winner = 0, steps = 8\n",
      "04:07:45 [INFO] train episode 2696: winner = 1, steps = 6\n",
      "04:07:48 [INFO] train episode 2697: winner = 1, steps = 4\n",
      "04:07:53 [INFO] train episode 2698: winner = 0, steps = 8\n",
      "04:07:57 [INFO] train episode 2699: winner = -1, steps = 5\n",
      "04:08:03 [INFO] train episode 2700: winner = 0, steps = 8\n",
      "04:08:04 [INFO] train episode 2701: winner = 1, steps = 6\n",
      "04:08:10 [INFO] train episode 2702: winner = 0, steps = 8\n",
      "04:08:12 [INFO] train episode 2703: winner = 0, steps = 8\n",
      "04:08:15 [INFO] train episode 2704: winner = 1, steps = 6\n",
      "04:08:17 [INFO] train episode 2705: winner = 1, steps = 6\n",
      "04:08:20 [INFO] train episode 2706: winner = 0, steps = 8\n",
      "04:08:26 [INFO] train episode 2707: winner = 0, steps = 8\n",
      "04:08:30 [INFO] train episode 2708: winner = -1, steps = 7\n",
      "04:08:33 [INFO] train episode 2709: winner = 0, steps = 8\n",
      "04:08:35 [INFO] train episode 2710: winner = 1, steps = 6\n",
      "04:08:37 [INFO] train episode 2711: winner = 1, steps = 6\n",
      "04:08:40 [INFO] train episode 2712: winner = 1, steps = 8\n",
      "04:08:43 [INFO] train episode 2713: winner = 1, steps = 6\n",
      "04:08:45 [INFO] train episode 2714: winner = 0, steps = 8\n",
      "04:08:54 [INFO] train episode 2715: winner = 1, steps = 6\n",
      "04:08:58 [INFO] train episode 2716: winner = 1, steps = 6\n",
      "04:09:01 [INFO] train episode 2717: winner = 0, steps = 8\n",
      "04:09:03 [INFO] train episode 2718: winner = 1, steps = 6\n",
      "04:09:04 [INFO] train episode 2719: winner = 1, steps = 6\n",
      "04:09:05 [INFO] train episode 2720: winner = -1, steps = 7\n",
      "04:09:06 [INFO] train episode 2721: winner = 1, steps = 6\n",
      "04:09:09 [INFO] train episode 2722: winner = 0, steps = 8\n",
      "04:09:13 [INFO] train episode 2723: winner = 1, steps = 4\n",
      "04:09:13 [INFO] train episode 2724: winner = 1, steps = 6\n",
      "04:09:14 [INFO] train episode 2725: winner = 1, steps = 6\n",
      "04:09:19 [INFO] train episode 2726: winner = 1, steps = 4\n",
      "04:09:19 [INFO] train episode 2727: winner = 1, steps = 6\n",
      "04:09:20 [INFO] train episode 2728: winner = 0, steps = 8\n",
      "04:09:21 [INFO] train episode 2729: winner = 1, steps = 4\n",
      "04:09:23 [INFO] train episode 2730: winner = 0, steps = 8\n",
      "04:09:27 [INFO] train episode 2731: winner = 0, steps = 8\n",
      "04:09:27 [INFO] train episode 2732: winner = 1, steps = 6\n",
      "04:09:35 [INFO] train episode 2733: winner = 0, steps = 8\n",
      "04:09:36 [INFO] train episode 2734: winner = 1, steps = 4\n",
      "04:09:36 [INFO] train episode 2735: winner = 1, steps = 4\n",
      "04:09:36 [INFO] train episode 2736: winner = 0, steps = 8\n",
      "04:09:43 [INFO] train episode 2737: winner = 1, steps = 6\n",
      "04:09:43 [INFO] train episode 2738: winner = 1, steps = 4\n",
      "04:09:44 [INFO] train episode 2739: winner = 1, steps = 8\n",
      "04:09:47 [INFO] train episode 2740: winner = 0, steps = 8\n",
      "04:09:48 [INFO] train episode 2741: winner = -1, steps = 7\n",
      "04:09:50 [INFO] train episode 2742: winner = 0, steps = 8\n",
      "04:09:51 [INFO] train episode 2743: winner = 0, steps = 8\n",
      "04:09:53 [INFO] train episode 2744: winner = 1, steps = 4\n",
      "04:09:55 [INFO] train episode 2745: winner = 1, steps = 6\n",
      "04:09:57 [INFO] train episode 2746: winner = 1, steps = 6\n",
      "04:10:02 [INFO] train episode 2747: winner = 0, steps = 8\n",
      "04:10:04 [INFO] train episode 2748: winner = 1, steps = 6\n",
      "04:10:05 [INFO] train episode 2749: winner = 0, steps = 8\n",
      "04:10:05 [INFO] train episode 2750: winner = 1, steps = 6\n",
      "04:10:08 [INFO] train episode 2751: winner = 1, steps = 6\n",
      "04:10:10 [INFO] train episode 2752: winner = -1, steps = 5\n",
      "04:10:12 [INFO] train episode 2753: winner = 0, steps = 8\n",
      "04:10:14 [INFO] train episode 2754: winner = -1, steps = 7\n",
      "04:10:14 [INFO] train episode 2755: winner = 0, steps = 8\n",
      "04:10:17 [INFO] train episode 2756: winner = 1, steps = 4\n",
      "04:10:17 [INFO] train episode 2757: winner = 1, steps = 6\n",
      "04:10:18 [INFO] train episode 2758: winner = 1, steps = 6\n",
      "04:10:21 [INFO] train episode 2759: winner = 0, steps = 8\n",
      "04:10:22 [INFO] train episode 2760: winner = 0, steps = 8\n",
      "04:10:24 [INFO] train episode 2761: winner = 1, steps = 4\n",
      "04:10:25 [INFO] train episode 2762: winner = 1, steps = 6\n",
      "04:10:28 [INFO] train episode 2763: winner = 0, steps = 8\n",
      "04:10:28 [INFO] train episode 2764: winner = 0, steps = 8\n",
      "04:10:29 [INFO] train episode 2765: winner = -1, steps = 5\n",
      "04:10:30 [INFO] train episode 2766: winner = 1, steps = 4\n",
      "04:10:31 [INFO] train episode 2767: winner = 1, steps = 6\n",
      "04:10:34 [INFO] train episode 2768: winner = -1, steps = 7\n",
      "04:10:35 [INFO] train episode 2769: winner = 0, steps = 8\n",
      "04:10:37 [INFO] train episode 2770: winner = -1, steps = 7\n",
      "04:10:38 [INFO] train episode 2771: winner = 1, steps = 6\n",
      "04:10:41 [INFO] train episode 2772: winner = 0, steps = 8\n",
      "04:10:41 [INFO] train episode 2773: winner = 0, steps = 8\n",
      "04:10:42 [INFO] train episode 2774: winner = 1, steps = 6\n",
      "04:10:42 [INFO] train episode 2775: winner = 1, steps = 6\n",
      "04:10:43 [INFO] train episode 2776: winner = 1, steps = 6\n",
      "04:10:43 [INFO] train episode 2777: winner = 1, steps = 4\n",
      "04:10:46 [INFO] train episode 2778: winner = 0, steps = 8\n",
      "04:10:47 [INFO] train episode 2779: winner = -1, steps = 7\n",
      "04:10:48 [INFO] train episode 2780: winner = -1, steps = 5\n",
      "04:10:48 [INFO] train episode 2781: winner = 1, steps = 6\n",
      "04:10:48 [INFO] train episode 2782: winner = 0, steps = 8\n",
      "04:10:49 [INFO] train episode 2783: winner = -1, steps = 7\n",
      "04:10:49 [INFO] train episode 2784: winner = -1, steps = 5\n",
      "04:10:51 [INFO] train episode 2785: winner = 1, steps = 4\n",
      "04:10:54 [INFO] train episode 2786: winner = 1, steps = 8\n",
      "04:10:54 [INFO] train episode 2787: winner = -1, steps = 7\n",
      "04:10:57 [INFO] train episode 2788: winner = 0, steps = 8\n",
      "04:10:57 [INFO] train episode 2789: winner = 1, steps = 6\n",
      "04:10:57 [INFO] train episode 2790: winner = -1, steps = 5\n",
      "04:10:57 [INFO] train episode 2791: winner = 1, steps = 4\n",
      "04:10:59 [INFO] train episode 2792: winner = 1, steps = 6\n",
      "04:11:01 [INFO] train episode 2793: winner = 0, steps = 8\n",
      "04:11:02 [INFO] train episode 2794: winner = 0, steps = 8\n",
      "04:11:02 [INFO] train episode 2795: winner = 1, steps = 4\n",
      "04:11:06 [INFO] train episode 2796: winner = 1, steps = 8\n",
      "04:11:06 [INFO] test episode 2796:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:11:28 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "04:11:47 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+o+\n",
      "+++\n",
      "04:12:02 [INFO] step 2：player 1, action (0, 0)\n",
      "ox+\n",
      "+o+\n",
      "+++\n",
      "04:12:04 [INFO] step 3：player -1, action (1, 2)\n",
      "ox+\n",
      "+ox\n",
      "+++\n",
      "04:12:06 [INFO] step 4：player 1, action (2, 1)\n",
      "ox+\n",
      "+ox\n",
      "+o+\n",
      "04:12:08 [INFO] step 5：player -1, action (2, 2)\n",
      "ox+\n",
      "+ox\n",
      "+ox\n",
      "04:12:08 [INFO] step 6：player 1, action (0, 2)\n",
      "oxo\n",
      "+ox\n",
      "+ox\n",
      "04:12:08 [INFO] step 7：player -1, action (2, 0)\n",
      "oxo\n",
      "+ox\n",
      "xox\n",
      "04:12:08 [INFO] step 8：player 1, action (1, 0)\n",
      "oxo\n",
      "oox\n",
      "xox\n",
      "04:12:08 [INFO] test episode 2796: winner = 0, steps = 8\n",
      "04:12:23 [INFO] train episode 2797: winner = 1, steps = 6\n",
      "04:12:50 [INFO] train episode 2798: winner = 1, steps = 6\n",
      "04:12:58 [INFO] train episode 2799: winner = 1, steps = 6\n",
      "04:13:40 [INFO] train episode 2800: winner = 1, steps = 8\n",
      "04:14:10 [INFO] train episode 2801: winner = 1, steps = 6\n",
      "04:14:31 [INFO] train episode 2802: winner = 1, steps = 6\n",
      "04:14:58 [INFO] train episode 2803: winner = 0, steps = 8\n",
      "04:14:58 [INFO] train episode 2804: winner = 1, steps = 6\n",
      "04:15:15 [INFO] train episode 2805: winner = 1, steps = 6\n",
      "04:15:29 [INFO] train episode 2806: winner = 1, steps = 4\n",
      "04:15:35 [INFO] train episode 2807: winner = -1, steps = 5\n",
      "04:15:38 [INFO] train episode 2808: winner = 1, steps = 4\n",
      "04:15:53 [INFO] train episode 2809: winner = 1, steps = 4\n",
      "04:15:54 [INFO] train episode 2810: winner = -1, steps = 5\n",
      "04:16:09 [INFO] train episode 2811: winner = -1, steps = 7\n",
      "04:16:21 [INFO] train episode 2812: winner = 0, steps = 8\n",
      "04:16:26 [INFO] train episode 2813: winner = 1, steps = 8\n",
      "04:16:29 [INFO] train episode 2814: winner = 1, steps = 6\n",
      "04:16:50 [INFO] train episode 2815: winner = 0, steps = 8\n",
      "04:16:55 [INFO] train episode 2816: winner = -1, steps = 7\n",
      "04:17:07 [INFO] train episode 2817: winner = 0, steps = 8\n",
      "04:17:09 [INFO] train episode 2818: winner = -1, steps = 7\n",
      "04:17:13 [INFO] train episode 2819: winner = 1, steps = 6\n",
      "04:17:15 [INFO] train episode 2820: winner = -1, steps = 5\n",
      "04:17:17 [INFO] train episode 2821: winner = -1, steps = 7\n",
      "04:17:22 [INFO] train episode 2822: winner = 1, steps = 4\n",
      "04:17:32 [INFO] train episode 2823: winner = 0, steps = 8\n",
      "04:17:40 [INFO] train episode 2824: winner = 1, steps = 6\n",
      "04:17:46 [INFO] train episode 2825: winner = 0, steps = 8\n",
      "04:17:48 [INFO] train episode 2826: winner = -1, steps = 5\n",
      "04:17:49 [INFO] train episode 2827: winner = 1, steps = 6\n",
      "04:17:54 [INFO] train episode 2828: winner = 0, steps = 8\n",
      "04:18:10 [INFO] train episode 2829: winner = -1, steps = 7\n",
      "04:18:17 [INFO] train episode 2830: winner = 0, steps = 8\n",
      "04:18:28 [INFO] train episode 2831: winner = 0, steps = 8\n",
      "04:18:37 [INFO] train episode 2832: winner = 0, steps = 8\n",
      "04:18:42 [INFO] train episode 2833: winner = 1, steps = 6\n",
      "04:18:42 [INFO] train episode 2834: winner = 1, steps = 6\n",
      "04:18:44 [INFO] train episode 2835: winner = 1, steps = 4\n",
      "04:18:46 [INFO] train episode 2836: winner = 1, steps = 6\n",
      "04:18:48 [INFO] train episode 2837: winner = 0, steps = 8\n",
      "04:18:51 [INFO] train episode 2838: winner = -1, steps = 7\n",
      "04:18:51 [INFO] train episode 2839: winner = 1, steps = 4\n",
      "04:18:55 [INFO] train episode 2840: winner = 1, steps = 4\n",
      "04:18:56 [INFO] train episode 2841: winner = 1, steps = 4\n",
      "04:18:56 [INFO] train episode 2842: winner = 0, steps = 8\n",
      "04:18:57 [INFO] train episode 2843: winner = 1, steps = 4\n",
      "04:18:58 [INFO] train episode 2844: winner = 1, steps = 8\n",
      "04:19:00 [INFO] train episode 2845: winner = 1, steps = 4\n",
      "04:19:00 [INFO] train episode 2846: winner = -1, steps = 5\n",
      "04:19:01 [INFO] train episode 2847: winner = 1, steps = 4\n",
      "04:19:02 [INFO] train episode 2848: winner = 1, steps = 6\n",
      "04:19:03 [INFO] train episode 2849: winner = 1, steps = 4\n",
      "04:19:06 [INFO] train episode 2850: winner = 1, steps = 4\n",
      "04:19:06 [INFO] train episode 2851: winner = 1, steps = 4\n",
      "04:19:09 [INFO] train episode 2852: winner = 0, steps = 8\n",
      "04:19:11 [INFO] train episode 2853: winner = -1, steps = 7\n",
      "04:19:11 [INFO] train episode 2854: winner = 1, steps = 4\n",
      "04:19:13 [INFO] train episode 2855: winner = 0, steps = 8\n",
      "04:19:13 [INFO] train episode 2856: winner = 1, steps = 6\n",
      "04:19:24 [INFO] train episode 2857: winner = 0, steps = 8\n",
      "04:19:25 [INFO] train episode 2858: winner = 1, steps = 4\n",
      "04:19:25 [INFO] train episode 2859: winner = 1, steps = 6\n",
      "04:19:28 [INFO] train episode 2860: winner = 1, steps = 6\n",
      "04:19:30 [INFO] train episode 2861: winner = -1, steps = 7\n",
      "04:19:31 [INFO] train episode 2862: winner = 1, steps = 6\n",
      "04:19:34 [INFO] train episode 2863: winner = 0, steps = 8\n",
      "04:19:34 [INFO] train episode 2864: winner = 0, steps = 8\n",
      "04:19:36 [INFO] train episode 2865: winner = -1, steps = 7\n",
      "04:19:37 [INFO] train episode 2866: winner = 0, steps = 8\n",
      "04:19:40 [INFO] train episode 2867: winner = 1, steps = 4\n",
      "04:19:50 [INFO] train episode 2868: winner = 1, steps = 8\n",
      "04:19:50 [INFO] train episode 2869: winner = 1, steps = 4\n",
      "04:19:55 [INFO] train episode 2870: winner = -1, steps = 7\n",
      "04:19:56 [INFO] train episode 2871: winner = 0, steps = 8\n",
      "04:19:58 [INFO] train episode 2872: winner = 1, steps = 4\n",
      "04:19:59 [INFO] train episode 2873: winner = 0, steps = 8\n",
      "04:20:03 [INFO] train episode 2874: winner = 1, steps = 6\n",
      "04:20:06 [INFO] train episode 2875: winner = 0, steps = 8\n",
      "04:20:08 [INFO] train episode 2876: winner = 0, steps = 8\n",
      "04:20:12 [INFO] train episode 2877: winner = 0, steps = 8\n",
      "04:20:15 [INFO] train episode 2878: winner = 1, steps = 6\n",
      "04:20:16 [INFO] train episode 2879: winner = 0, steps = 8\n",
      "04:20:16 [INFO] train episode 2880: winner = 1, steps = 4\n",
      "04:20:18 [INFO] train episode 2881: winner = -1, steps = 7\n",
      "04:20:18 [INFO] train episode 2882: winner = 0, steps = 8\n",
      "04:20:18 [INFO] train episode 2883: winner = -1, steps = 7\n",
      "04:20:21 [INFO] train episode 2884: winner = 0, steps = 8\n",
      "04:20:24 [INFO] train episode 2885: winner = 1, steps = 6\n",
      "04:20:24 [INFO] train episode 2886: winner = 1, steps = 4\n",
      "04:20:26 [INFO] train episode 2887: winner = 1, steps = 6\n",
      "04:20:31 [INFO] train episode 2888: winner = -1, steps = 7\n",
      "04:20:33 [INFO] train episode 2889: winner = 1, steps = 4\n",
      "04:20:33 [INFO] train episode 2890: winner = 0, steps = 8\n",
      "04:20:34 [INFO] train episode 2891: winner = 1, steps = 6\n",
      "04:20:36 [INFO] train episode 2892: winner = 1, steps = 6\n",
      "04:20:37 [INFO] train episode 2893: winner = 1, steps = 4\n",
      "04:20:37 [INFO] train episode 2894: winner = 1, steps = 6\n",
      "04:20:43 [INFO] train episode 2895: winner = 1, steps = 8\n",
      "04:20:49 [INFO] train episode 2896: winner = -1, steps = 7\n",
      "04:20:52 [INFO] train episode 2897: winner = 0, steps = 8\n",
      "04:20:56 [INFO] train episode 2898: winner = -1, steps = 5\n",
      "04:21:00 [INFO] train episode 2899: winner = 1, steps = 6\n",
      "04:21:01 [INFO] train episode 2900: winner = 0, steps = 8\n",
      "04:21:02 [INFO] train episode 2901: winner = 0, steps = 8\n",
      "04:21:05 [INFO] train episode 2902: winner = 1, steps = 6\n",
      "04:21:06 [INFO] train episode 2903: winner = 1, steps = 6\n",
      "04:21:08 [INFO] train episode 2904: winner = -1, steps = 5\n",
      "04:21:09 [INFO] train episode 2905: winner = 1, steps = 4\n",
      "04:21:09 [INFO] train episode 2906: winner = 1, steps = 4\n",
      "04:21:10 [INFO] train episode 2907: winner = -1, steps = 7\n",
      "04:21:12 [INFO] train episode 2908: winner = 0, steps = 8\n",
      "04:21:12 [INFO] train episode 2909: winner = 0, steps = 8\n",
      "04:21:14 [INFO] train episode 2910: winner = 0, steps = 8\n",
      "04:21:22 [INFO] train episode 2911: winner = 1, steps = 4\n",
      "04:21:23 [INFO] train episode 2912: winner = 1, steps = 6\n",
      "04:21:25 [INFO] train episode 2913: winner = 1, steps = 6\n",
      "04:21:26 [INFO] train episode 2914: winner = 1, steps = 6\n",
      "04:21:26 [INFO] train episode 2915: winner = 0, steps = 8\n",
      "04:21:26 [INFO] train episode 2916: winner = 1, steps = 6\n",
      "04:21:26 [INFO] train episode 2917: winner = 1, steps = 6\n",
      "04:21:29 [INFO] train episode 2918: winner = 0, steps = 8\n",
      "04:21:30 [INFO] train episode 2919: winner = -1, steps = 7\n",
      "04:21:32 [INFO] train episode 2920: winner = 1, steps = 6\n",
      "04:21:32 [INFO] train episode 2921: winner = 1, steps = 6\n",
      "04:21:32 [INFO] train episode 2922: winner = 1, steps = 6\n",
      "04:21:33 [INFO] train episode 2923: winner = 1, steps = 4\n",
      "04:21:33 [INFO] train episode 2924: winner = 1, steps = 4\n",
      "04:21:33 [INFO] train episode 2925: winner = 1, steps = 6\n",
      "04:21:34 [INFO] train episode 2926: winner = 0, steps = 8\n",
      "04:21:34 [INFO] train episode 2927: winner = 1, steps = 6\n",
      "04:21:34 [INFO] train episode 2928: winner = 0, steps = 8\n",
      "04:21:37 [INFO] train episode 2929: winner = 1, steps = 6\n",
      "04:21:38 [INFO] train episode 2930: winner = 1, steps = 4\n",
      "04:21:38 [INFO] train episode 2931: winner = 1, steps = 4\n",
      "04:21:41 [INFO] train episode 2932: winner = 1, steps = 6\n",
      "04:21:44 [INFO] train episode 2933: winner = 1, steps = 6\n",
      "04:21:46 [INFO] train episode 2934: winner = 1, steps = 6\n",
      "04:21:47 [INFO] train episode 2935: winner = 1, steps = 4\n",
      "04:21:47 [INFO] test episode 2935:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:22:08 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "04:22:27 [INFO] step 1：player -1, action (2, 2)\n",
      "+++\n",
      "o++\n",
      "++x\n",
      "04:22:39 [INFO] step 2：player 1, action (0, 0)\n",
      "o++\n",
      "o++\n",
      "++x\n",
      "04:22:42 [INFO] step 3：player -1, action (2, 0)\n",
      "o++\n",
      "o++\n",
      "x+x\n",
      "04:22:42 [INFO] step 4：player 1, action (1, 2)\n",
      "o++\n",
      "o+o\n",
      "x+x\n",
      "04:22:42 [INFO] step 5：player -1, action (2, 1)\n",
      "o++\n",
      "o+o\n",
      "xxx\n",
      "04:22:42 [INFO] test episode 2935: winner = -1, steps = 5\n",
      "04:23:19 [INFO] train episode 2936: winner = 1, steps = 4\n",
      "04:23:49 [INFO] train episode 2937: winner = 1, steps = 4\n",
      "04:24:22 [INFO] train episode 2938: winner = 0, steps = 8\n",
      "04:24:51 [INFO] train episode 2939: winner = -1, steps = 7\n",
      "04:25:14 [INFO] train episode 2940: winner = 1, steps = 6\n",
      "04:25:19 [INFO] train episode 2941: winner = 1, steps = 6\n",
      "04:25:21 [INFO] train episode 2942: winner = 1, steps = 6\n",
      "04:25:35 [INFO] train episode 2943: winner = 1, steps = 6\n",
      "04:25:53 [INFO] train episode 2944: winner = 0, steps = 8\n",
      "04:26:00 [INFO] train episode 2945: winner = 1, steps = 4\n",
      "04:26:16 [INFO] train episode 2946: winner = 1, steps = 4\n",
      "04:26:21 [INFO] train episode 2947: winner = 0, steps = 8\n",
      "04:26:32 [INFO] train episode 2948: winner = 1, steps = 4\n",
      "04:26:37 [INFO] train episode 2949: winner = -1, steps = 7\n",
      "04:26:50 [INFO] train episode 2950: winner = -1, steps = 5\n",
      "04:26:51 [INFO] train episode 2951: winner = 1, steps = 4\n",
      "04:26:52 [INFO] train episode 2952: winner = 1, steps = 6\n",
      "04:26:53 [INFO] train episode 2953: winner = 0, steps = 8\n",
      "04:27:00 [INFO] train episode 2954: winner = 1, steps = 4\n",
      "04:27:27 [INFO] train episode 2955: winner = 1, steps = 4\n",
      "04:27:33 [INFO] train episode 2956: winner = -1, steps = 7\n",
      "04:27:34 [INFO] train episode 2957: winner = 0, steps = 8\n",
      "04:27:35 [INFO] train episode 2958: winner = 1, steps = 6\n",
      "04:27:40 [INFO] train episode 2959: winner = 1, steps = 6\n",
      "04:27:41 [INFO] train episode 2960: winner = 1, steps = 6\n",
      "04:28:04 [INFO] train episode 2961: winner = -1, steps = 5\n",
      "04:28:13 [INFO] train episode 2962: winner = 1, steps = 6\n",
      "04:28:15 [INFO] train episode 2963: winner = 0, steps = 8\n",
      "04:28:34 [INFO] train episode 2964: winner = 0, steps = 8\n",
      "04:28:37 [INFO] train episode 2965: winner = 1, steps = 4\n",
      "04:28:38 [INFO] train episode 2966: winner = 1, steps = 4\n",
      "04:28:38 [INFO] train episode 2967: winner = 1, steps = 4\n",
      "04:28:42 [INFO] train episode 2968: winner = 1, steps = 4\n",
      "04:28:42 [INFO] train episode 2969: winner = 1, steps = 6\n",
      "04:28:44 [INFO] train episode 2970: winner = 1, steps = 4\n",
      "04:28:44 [INFO] train episode 2971: winner = 1, steps = 4\n",
      "04:28:53 [INFO] train episode 2972: winner = 0, steps = 8\n",
      "04:28:54 [INFO] train episode 2973: winner = 1, steps = 6\n",
      "04:28:59 [INFO] train episode 2974: winner = 0, steps = 8\n",
      "04:28:59 [INFO] train episode 2975: winner = 1, steps = 4\n",
      "04:29:01 [INFO] train episode 2976: winner = 1, steps = 6\n",
      "04:29:10 [INFO] train episode 2977: winner = 1, steps = 6\n",
      "04:29:15 [INFO] train episode 2978: winner = 0, steps = 8\n",
      "04:29:20 [INFO] train episode 2979: winner = 0, steps = 8\n",
      "04:29:22 [INFO] train episode 2980: winner = 1, steps = 4\n",
      "04:29:28 [INFO] train episode 2981: winner = 0, steps = 8\n",
      "04:29:32 [INFO] train episode 2982: winner = 1, steps = 4\n",
      "04:29:36 [INFO] train episode 2983: winner = 0, steps = 8\n",
      "04:29:38 [INFO] train episode 2984: winner = 1, steps = 4\n",
      "04:29:39 [INFO] train episode 2985: winner = -1, steps = 7\n",
      "04:29:39 [INFO] train episode 2986: winner = 1, steps = 4\n",
      "04:29:40 [INFO] train episode 2987: winner = -1, steps = 5\n",
      "04:29:46 [INFO] train episode 2988: winner = 0, steps = 8\n",
      "04:29:48 [INFO] train episode 2989: winner = 1, steps = 4\n",
      "04:29:50 [INFO] train episode 2990: winner = 0, steps = 8\n",
      "04:29:53 [INFO] train episode 2991: winner = 1, steps = 6\n",
      "04:29:57 [INFO] train episode 2992: winner = 1, steps = 6\n",
      "04:29:57 [INFO] train episode 2993: winner = 1, steps = 4\n",
      "04:30:09 [INFO] train episode 2994: winner = 0, steps = 8\n",
      "04:30:12 [INFO] train episode 2995: winner = 0, steps = 8\n",
      "04:30:13 [INFO] train episode 2996: winner = 1, steps = 4\n",
      "04:30:21 [INFO] train episode 2997: winner = 0, steps = 8\n",
      "04:30:23 [INFO] train episode 2998: winner = 0, steps = 8\n",
      "04:30:23 [INFO] train episode 2999: winner = 1, steps = 6\n",
      "04:30:26 [INFO] train episode 3000: winner = 1, steps = 4\n",
      "04:30:26 [INFO] train episode 3001: winner = -1, steps = 7\n",
      "04:30:29 [INFO] train episode 3002: winner = 0, steps = 8\n",
      "04:30:29 [INFO] train episode 3003: winner = 1, steps = 4\n",
      "04:30:31 [INFO] train episode 3004: winner = 1, steps = 6\n",
      "04:30:35 [INFO] train episode 3005: winner = 1, steps = 6\n",
      "04:30:39 [INFO] train episode 3006: winner = 1, steps = 6\n",
      "04:30:42 [INFO] train episode 3007: winner = -1, steps = 5\n",
      "04:30:43 [INFO] train episode 3008: winner = 1, steps = 6\n",
      "04:30:43 [INFO] train episode 3009: winner = 1, steps = 4\n",
      "04:30:43 [INFO] train episode 3010: winner = 1, steps = 4\n",
      "04:30:46 [INFO] train episode 3011: winner = 1, steps = 6\n",
      "04:30:46 [INFO] train episode 3012: winner = 1, steps = 6\n",
      "04:30:56 [INFO] train episode 3013: winner = 0, steps = 8\n",
      "04:30:56 [INFO] train episode 3014: winner = 0, steps = 8\n",
      "04:30:57 [INFO] train episode 3015: winner = 1, steps = 4\n",
      "04:30:59 [INFO] train episode 3016: winner = 1, steps = 4\n",
      "04:31:05 [INFO] train episode 3017: winner = 0, steps = 8\n",
      "04:31:09 [INFO] train episode 3018: winner = 0, steps = 8\n",
      "04:31:10 [INFO] train episode 3019: winner = 1, steps = 6\n",
      "04:31:12 [INFO] train episode 3020: winner = 0, steps = 8\n",
      "04:31:13 [INFO] train episode 3021: winner = 1, steps = 4\n",
      "04:31:17 [INFO] train episode 3022: winner = 0, steps = 8\n",
      "04:31:23 [INFO] train episode 3023: winner = 0, steps = 8\n",
      "04:31:24 [INFO] train episode 3024: winner = 1, steps = 6\n",
      "04:31:26 [INFO] train episode 3025: winner = -1, steps = 7\n",
      "04:31:28 [INFO] train episode 3026: winner = 1, steps = 4\n",
      "04:31:28 [INFO] train episode 3027: winner = -1, steps = 7\n",
      "04:31:28 [INFO] train episode 3028: winner = 0, steps = 8\n",
      "04:31:31 [INFO] train episode 3029: winner = 1, steps = 6\n",
      "04:31:32 [INFO] train episode 3030: winner = -1, steps = 5\n",
      "04:31:35 [INFO] train episode 3031: winner = 1, steps = 6\n",
      "04:31:35 [INFO] train episode 3032: winner = 0, steps = 8\n",
      "04:31:38 [INFO] train episode 3033: winner = 1, steps = 6\n",
      "04:31:40 [INFO] train episode 3034: winner = 1, steps = 6\n",
      "04:31:42 [INFO] train episode 3035: winner = 0, steps = 8\n",
      "04:31:43 [INFO] train episode 3036: winner = -1, steps = 5\n",
      "04:31:43 [INFO] train episode 3037: winner = 0, steps = 8\n",
      "04:31:45 [INFO] train episode 3038: winner = -1, steps = 5\n",
      "04:31:45 [INFO] train episode 3039: winner = 1, steps = 4\n",
      "04:31:45 [INFO] train episode 3040: winner = 1, steps = 6\n",
      "04:31:48 [INFO] train episode 3041: winner = 1, steps = 6\n",
      "04:31:49 [INFO] train episode 3042: winner = 1, steps = 6\n",
      "04:31:54 [INFO] train episode 3043: winner = 0, steps = 8\n",
      "04:31:55 [INFO] train episode 3044: winner = 1, steps = 4\n",
      "04:31:58 [INFO] train episode 3045: winner = 0, steps = 8\n",
      "04:31:59 [INFO] train episode 3046: winner = 0, steps = 8\n",
      "04:32:00 [INFO] train episode 3047: winner = 1, steps = 4\n",
      "04:32:00 [INFO] train episode 3048: winner = 0, steps = 8\n",
      "04:32:00 [INFO] train episode 3049: winner = 0, steps = 8\n",
      "04:32:03 [INFO] train episode 3050: winner = 1, steps = 4\n",
      "04:32:03 [INFO] train episode 3051: winner = -1, steps = 7\n",
      "04:32:04 [INFO] train episode 3052: winner = -1, steps = 5\n",
      "04:32:07 [INFO] train episode 3053: winner = 0, steps = 8\n",
      "04:32:07 [INFO] train episode 3054: winner = 1, steps = 6\n",
      "04:32:08 [INFO] train episode 3055: winner = -1, steps = 7\n",
      "04:32:08 [INFO] train episode 3056: winner = 0, steps = 8\n",
      "04:32:08 [INFO] train episode 3057: winner = 1, steps = 4\n",
      "04:32:08 [INFO] train episode 3058: winner = 0, steps = 8\n",
      "04:32:10 [INFO] train episode 3059: winner = 0, steps = 8\n",
      "04:32:11 [INFO] train episode 3060: winner = 0, steps = 8\n",
      "04:32:11 [INFO] train episode 3061: winner = 1, steps = 4\n",
      "04:32:14 [INFO] train episode 3062: winner = 1, steps = 8\n",
      "04:32:17 [INFO] train episode 3063: winner = -1, steps = 7\n",
      "04:32:17 [INFO] train episode 3064: winner = 1, steps = 4\n",
      "04:32:20 [INFO] train episode 3065: winner = 1, steps = 8\n",
      "04:32:20 [INFO] train episode 3066: winner = 1, steps = 4\n",
      "04:32:23 [INFO] train episode 3067: winner = 0, steps = 8\n",
      "04:32:24 [INFO] train episode 3068: winner = 1, steps = 6\n",
      "04:32:27 [INFO] train episode 3069: winner = 0, steps = 8\n",
      "04:32:28 [INFO] train episode 3070: winner = 1, steps = 6\n",
      "04:32:28 [INFO] train episode 3071: winner = 1, steps = 4\n",
      "04:32:28 [INFO] train episode 3072: winner = 1, steps = 6\n",
      "04:32:31 [INFO] train episode 3073: winner = 1, steps = 6\n",
      "04:32:32 [INFO] train episode 3074: winner = 0, steps = 8\n",
      "04:32:32 [INFO] train episode 3075: winner = 1, steps = 6\n",
      "04:32:34 [INFO] train episode 3076: winner = -1, steps = 5\n",
      "04:32:34 [INFO] test episode 3076:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:32:56 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "04:33:14 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "o++\n",
      "x++\n",
      "04:33:31 [INFO] step 2：player 1, action (0, 2)\n",
      "++o\n",
      "o++\n",
      "x++\n",
      "04:33:43 [INFO] step 3：player -1, action (0, 0)\n",
      "x+o\n",
      "o++\n",
      "x++\n",
      "04:33:45 [INFO] step 4：player 1, action (1, 2)\n",
      "x+o\n",
      "o+o\n",
      "x++\n",
      "04:33:45 [INFO] step 5：player -1, action (2, 2)\n",
      "x+o\n",
      "o+o\n",
      "x+x\n",
      "04:33:45 [INFO] step 6：player 1, action (1, 1)\n",
      "x+o\n",
      "ooo\n",
      "x+x\n",
      "04:33:45 [INFO] test episode 3076: winner = 1, steps = 6\n",
      "04:34:19 [INFO] train episode 3077: winner = 0, steps = 8\n",
      "04:34:48 [INFO] train episode 3078: winner = 1, steps = 6\n",
      "04:35:01 [INFO] train episode 3079: winner = -1, steps = 7\n",
      "04:35:28 [INFO] train episode 3080: winner = 1, steps = 4\n",
      "04:35:55 [INFO] train episode 3081: winner = -1, steps = 5\n",
      "04:36:00 [INFO] train episode 3082: winner = 1, steps = 6\n",
      "04:36:17 [INFO] train episode 3083: winner = 1, steps = 4\n",
      "04:36:32 [INFO] train episode 3084: winner = 0, steps = 8\n",
      "04:36:43 [INFO] train episode 3085: winner = 1, steps = 6\n",
      "04:36:48 [INFO] train episode 3086: winner = 0, steps = 8\n",
      "04:37:16 [INFO] train episode 3087: winner = 1, steps = 6\n",
      "04:37:56 [INFO] train episode 3088: winner = 1, steps = 8\n",
      "04:38:01 [INFO] train episode 3089: winner = 1, steps = 4\n",
      "04:38:19 [INFO] train episode 3090: winner = 1, steps = 6\n",
      "04:38:27 [INFO] train episode 3091: winner = -1, steps = 5\n",
      "04:38:28 [INFO] train episode 3092: winner = 1, steps = 6\n",
      "04:38:32 [INFO] train episode 3093: winner = 1, steps = 4\n",
      "04:38:37 [INFO] train episode 3094: winner = 1, steps = 8\n",
      "04:38:50 [INFO] train episode 3095: winner = 0, steps = 8\n",
      "04:38:55 [INFO] train episode 3096: winner = 1, steps = 4\n",
      "04:39:01 [INFO] train episode 3097: winner = 1, steps = 4\n",
      "04:39:13 [INFO] train episode 3098: winner = -1, steps = 5\n",
      "04:39:17 [INFO] train episode 3099: winner = 0, steps = 8\n",
      "04:39:19 [INFO] train episode 3100: winner = 1, steps = 6\n",
      "04:39:20 [INFO] train episode 3101: winner = 1, steps = 8\n",
      "04:39:28 [INFO] train episode 3102: winner = 0, steps = 8\n",
      "04:39:30 [INFO] train episode 3103: winner = 1, steps = 4\n",
      "04:39:32 [INFO] train episode 3104: winner = 0, steps = 8\n",
      "04:39:33 [INFO] train episode 3105: winner = 0, steps = 8\n",
      "04:39:42 [INFO] train episode 3106: winner = 0, steps = 8\n",
      "04:39:52 [INFO] train episode 3107: winner = 1, steps = 6\n",
      "04:39:53 [INFO] train episode 3108: winner = 1, steps = 4\n",
      "04:40:00 [INFO] train episode 3109: winner = 0, steps = 8\n",
      "04:40:00 [INFO] train episode 3110: winner = 1, steps = 6\n",
      "04:40:08 [INFO] train episode 3111: winner = 0, steps = 8\n",
      "04:40:10 [INFO] train episode 3112: winner = 1, steps = 6\n",
      "04:40:15 [INFO] train episode 3113: winner = 1, steps = 4\n",
      "04:40:16 [INFO] train episode 3114: winner = 1, steps = 4\n",
      "04:40:18 [INFO] train episode 3115: winner = -1, steps = 7\n",
      "04:40:22 [INFO] train episode 3116: winner = 1, steps = 4\n",
      "04:40:30 [INFO] train episode 3117: winner = -1, steps = 7\n",
      "04:40:33 [INFO] train episode 3118: winner = 0, steps = 8\n",
      "04:40:37 [INFO] train episode 3119: winner = -1, steps = 5\n",
      "04:40:38 [INFO] train episode 3120: winner = 1, steps = 4\n",
      "04:40:41 [INFO] train episode 3121: winner = 1, steps = 6\n",
      "04:40:41 [INFO] train episode 3122: winner = 1, steps = 6\n",
      "04:40:44 [INFO] train episode 3123: winner = -1, steps = 7\n",
      "04:40:44 [INFO] train episode 3124: winner = 1, steps = 6\n",
      "04:40:46 [INFO] train episode 3125: winner = 1, steps = 8\n",
      "04:40:52 [INFO] train episode 3126: winner = 0, steps = 8\n",
      "04:40:58 [INFO] train episode 3127: winner = 1, steps = 6\n",
      "04:41:04 [INFO] train episode 3128: winner = 1, steps = 8\n",
      "04:41:04 [INFO] train episode 3129: winner = -1, steps = 5\n",
      "04:41:09 [INFO] train episode 3130: winner = 0, steps = 8\n",
      "04:41:09 [INFO] train episode 3131: winner = 1, steps = 6\n",
      "04:41:09 [INFO] train episode 3132: winner = 0, steps = 8\n",
      "04:41:11 [INFO] train episode 3133: winner = -1, steps = 7\n",
      "04:41:12 [INFO] train episode 3134: winner = 0, steps = 8\n",
      "04:41:13 [INFO] train episode 3135: winner = 1, steps = 4\n",
      "04:41:13 [INFO] train episode 3136: winner = 1, steps = 4\n",
      "04:41:16 [INFO] train episode 3137: winner = -1, steps = 5\n",
      "04:41:18 [INFO] train episode 3138: winner = 1, steps = 4\n",
      "04:41:21 [INFO] train episode 3139: winner = 1, steps = 6\n",
      "04:41:23 [INFO] train episode 3140: winner = 1, steps = 6\n",
      "04:41:26 [INFO] train episode 3141: winner = 1, steps = 4\n",
      "04:41:27 [INFO] train episode 3142: winner = 1, steps = 4\n",
      "04:41:28 [INFO] train episode 3143: winner = 0, steps = 8\n",
      "04:41:29 [INFO] train episode 3144: winner = 1, steps = 4\n",
      "04:41:35 [INFO] train episode 3145: winner = -1, steps = 5\n",
      "04:41:35 [INFO] train episode 3146: winner = -1, steps = 7\n",
      "04:41:39 [INFO] train episode 3147: winner = 1, steps = 8\n",
      "04:41:42 [INFO] train episode 3148: winner = 1, steps = 6\n",
      "04:41:46 [INFO] train episode 3149: winner = 0, steps = 8\n",
      "04:41:47 [INFO] train episode 3150: winner = 0, steps = 8\n",
      "04:41:48 [INFO] train episode 3151: winner = 1, steps = 6\n",
      "04:41:51 [INFO] train episode 3152: winner = 1, steps = 6\n",
      "04:41:52 [INFO] train episode 3153: winner = 1, steps = 6\n",
      "04:41:52 [INFO] train episode 3154: winner = -1, steps = 5\n",
      "04:41:56 [INFO] train episode 3155: winner = 1, steps = 6\n",
      "04:41:59 [INFO] train episode 3156: winner = 1, steps = 6\n",
      "04:41:59 [INFO] train episode 3157: winner = 1, steps = 4\n",
      "04:41:59 [INFO] train episode 3158: winner = 0, steps = 8\n",
      "04:42:00 [INFO] train episode 3159: winner = -1, steps = 7\n",
      "04:42:02 [INFO] train episode 3160: winner = -1, steps = 5\n",
      "04:42:05 [INFO] train episode 3161: winner = 0, steps = 8\n",
      "04:42:05 [INFO] train episode 3162: winner = 1, steps = 4\n",
      "04:42:08 [INFO] train episode 3163: winner = 1, steps = 6\n",
      "04:42:08 [INFO] train episode 3164: winner = 1, steps = 6\n",
      "04:42:15 [INFO] train episode 3165: winner = 0, steps = 8\n",
      "04:42:16 [INFO] train episode 3166: winner = 1, steps = 6\n",
      "04:42:18 [INFO] train episode 3167: winner = 1, steps = 4\n",
      "04:42:19 [INFO] train episode 3168: winner = 1, steps = 4\n",
      "04:42:20 [INFO] train episode 3169: winner = 1, steps = 4\n",
      "04:42:27 [INFO] train episode 3170: winner = 0, steps = 8\n",
      "04:42:27 [INFO] train episode 3171: winner = 1, steps = 4\n",
      "04:42:28 [INFO] train episode 3172: winner = 1, steps = 6\n",
      "04:42:29 [INFO] train episode 3173: winner = 1, steps = 4\n",
      "04:42:30 [INFO] train episode 3174: winner = 0, steps = 8\n",
      "04:42:30 [INFO] train episode 3175: winner = 1, steps = 6\n",
      "04:42:30 [INFO] train episode 3176: winner = 1, steps = 6\n",
      "04:42:32 [INFO] train episode 3177: winner = -1, steps = 7\n",
      "04:42:33 [INFO] train episode 3178: winner = 1, steps = 4\n",
      "04:42:34 [INFO] train episode 3179: winner = -1, steps = 7\n",
      "04:42:37 [INFO] train episode 3180: winner = -1, steps = 7\n",
      "04:42:38 [INFO] train episode 3181: winner = -1, steps = 5\n",
      "04:42:38 [INFO] train episode 3182: winner = 1, steps = 6\n",
      "04:42:39 [INFO] train episode 3183: winner = 1, steps = 4\n",
      "04:42:40 [INFO] train episode 3184: winner = 0, steps = 8\n",
      "04:42:44 [INFO] train episode 3185: winner = 0, steps = 8\n",
      "04:42:45 [INFO] train episode 3186: winner = 1, steps = 6\n",
      "04:42:47 [INFO] train episode 3187: winner = 1, steps = 4\n",
      "04:42:48 [INFO] train episode 3188: winner = 1, steps = 4\n",
      "04:42:48 [INFO] train episode 3189: winner = 1, steps = 4\n",
      "04:42:49 [INFO] train episode 3190: winner = -1, steps = 7\n",
      "04:42:49 [INFO] train episode 3191: winner = 0, steps = 8\n",
      "04:42:50 [INFO] train episode 3192: winner = 0, steps = 8\n",
      "04:42:52 [INFO] train episode 3193: winner = 1, steps = 6\n",
      "04:42:54 [INFO] train episode 3194: winner = 1, steps = 6\n",
      "04:42:54 [INFO] train episode 3195: winner = -1, steps = 5\n",
      "04:42:58 [INFO] train episode 3196: winner = 0, steps = 8\n",
      "04:42:59 [INFO] train episode 3197: winner = 1, steps = 4\n",
      "04:42:59 [INFO] train episode 3198: winner = 1, steps = 6\n",
      "04:43:00 [INFO] train episode 3199: winner = 1, steps = 4\n",
      "04:43:02 [INFO] train episode 3200: winner = 1, steps = 6\n",
      "04:43:02 [INFO] train episode 3201: winner = -1, steps = 7\n",
      "04:43:03 [INFO] train episode 3202: winner = 1, steps = 6\n",
      "04:43:04 [INFO] train episode 3203: winner = 1, steps = 4\n",
      "04:43:06 [INFO] train episode 3204: winner = 0, steps = 8\n",
      "04:43:06 [INFO] train episode 3205: winner = 1, steps = 6\n",
      "04:43:07 [INFO] train episode 3206: winner = 1, steps = 6\n",
      "04:43:07 [INFO] train episode 3207: winner = 1, steps = 6\n",
      "04:43:08 [INFO] train episode 3208: winner = 0, steps = 8\n",
      "04:43:08 [INFO] train episode 3209: winner = 1, steps = 4\n",
      "04:43:09 [INFO] train episode 3210: winner = -1, steps = 5\n",
      "04:43:09 [INFO] train episode 3211: winner = 1, steps = 4\n",
      "04:43:11 [INFO] train episode 3212: winner = 1, steps = 4\n",
      "04:43:11 [INFO] train episode 3213: winner = 1, steps = 6\n",
      "04:43:12 [INFO] train episode 3214: winner = 1, steps = 6\n",
      "04:43:13 [INFO] train episode 3215: winner = 1, steps = 6\n",
      "04:43:14 [INFO] train episode 3216: winner = 1, steps = 4\n",
      "04:43:14 [INFO] train episode 3217: winner = 1, steps = 4\n",
      "04:43:14 [INFO] train episode 3218: winner = 1, steps = 6\n",
      "04:43:14 [INFO] train episode 3219: winner = 1, steps = 4\n",
      "04:43:15 [INFO] train episode 3220: winner = -1, steps = 5\n",
      "04:43:15 [INFO] test episode 3220:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:43:37 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "04:43:56 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "++o\n",
      "+++\n",
      "04:44:08 [INFO] step 2：player 1, action (2, 2)\n",
      "+x+\n",
      "++o\n",
      "++o\n",
      "04:44:11 [INFO] step 3：player -1, action (0, 2)\n",
      "+xx\n",
      "++o\n",
      "++o\n",
      "04:44:11 [INFO] step 4：player 1, action (2, 0)\n",
      "+xx\n",
      "++o\n",
      "o+o\n",
      "04:44:11 [INFO] step 5：player -1, action (1, 1)\n",
      "+xx\n",
      "+xo\n",
      "o+o\n",
      "04:44:12 [INFO] step 6：player 1, action (2, 1)\n",
      "+xx\n",
      "+xo\n",
      "ooo\n",
      "04:44:12 [INFO] test episode 3220: winner = 1, steps = 6\n",
      "04:44:50 [INFO] train episode 3221: winner = 0, steps = 8\n",
      "04:44:53 [INFO] train episode 3222: winner = 0, steps = 8\n",
      "04:45:13 [INFO] train episode 3223: winner = 1, steps = 6\n",
      "04:45:24 [INFO] train episode 3224: winner = 1, steps = 4\n",
      "04:45:59 [INFO] train episode 3225: winner = -1, steps = 7\n",
      "04:46:30 [INFO] train episode 3226: winner = -1, steps = 7\n",
      "04:46:44 [INFO] train episode 3227: winner = 0, steps = 8\n",
      "04:47:16 [INFO] train episode 3228: winner = 1, steps = 6\n",
      "04:47:36 [INFO] train episode 3229: winner = 1, steps = 6\n",
      "04:47:48 [INFO] train episode 3230: winner = 1, steps = 6\n",
      "04:47:56 [INFO] train episode 3231: winner = 1, steps = 6\n",
      "04:47:56 [INFO] train episode 3232: winner = 1, steps = 6\n",
      "04:48:19 [INFO] train episode 3233: winner = 0, steps = 8\n",
      "04:48:20 [INFO] train episode 3234: winner = 1, steps = 6\n",
      "04:48:27 [INFO] train episode 3235: winner = 1, steps = 6\n",
      "04:48:32 [INFO] train episode 3236: winner = 1, steps = 4\n",
      "04:48:37 [INFO] train episode 3237: winner = -1, steps = 5\n",
      "04:48:55 [INFO] train episode 3238: winner = 0, steps = 8\n",
      "04:48:56 [INFO] train episode 3239: winner = 1, steps = 4\n",
      "04:49:02 [INFO] train episode 3240: winner = 0, steps = 8\n",
      "04:49:05 [INFO] train episode 3241: winner = -1, steps = 5\n",
      "04:49:09 [INFO] train episode 3242: winner = 1, steps = 4\n",
      "04:49:16 [INFO] train episode 3243: winner = 1, steps = 4\n",
      "04:49:26 [INFO] train episode 3244: winner = 0, steps = 8\n",
      "04:49:26 [INFO] train episode 3245: winner = 1, steps = 4\n",
      "04:49:38 [INFO] train episode 3246: winner = 0, steps = 8\n",
      "04:49:53 [INFO] train episode 3247: winner = 1, steps = 6\n",
      "04:49:56 [INFO] train episode 3248: winner = 1, steps = 6\n",
      "04:49:57 [INFO] train episode 3249: winner = -1, steps = 7\n",
      "04:50:02 [INFO] train episode 3250: winner = 0, steps = 8\n",
      "04:50:05 [INFO] train episode 3251: winner = 1, steps = 6\n",
      "04:50:12 [INFO] train episode 3252: winner = 1, steps = 4\n",
      "04:50:25 [INFO] train episode 3253: winner = 1, steps = 8\n",
      "04:50:33 [INFO] train episode 3254: winner = 1, steps = 4\n",
      "04:50:38 [INFO] train episode 3255: winner = 1, steps = 4\n",
      "04:50:41 [INFO] train episode 3256: winner = 1, steps = 4\n",
      "04:50:56 [INFO] train episode 3257: winner = 0, steps = 8\n",
      "04:51:03 [INFO] train episode 3258: winner = 0, steps = 8\n",
      "04:51:11 [INFO] train episode 3259: winner = 0, steps = 8\n",
      "04:51:17 [INFO] train episode 3260: winner = 0, steps = 8\n",
      "04:51:19 [INFO] train episode 3261: winner = 0, steps = 8\n",
      "04:51:25 [INFO] train episode 3262: winner = 0, steps = 8\n",
      "04:51:26 [INFO] train episode 3263: winner = 1, steps = 4\n",
      "04:51:28 [INFO] train episode 3264: winner = 0, steps = 8\n",
      "04:51:28 [INFO] train episode 3265: winner = 0, steps = 8\n",
      "04:51:32 [INFO] train episode 3266: winner = -1, steps = 7\n",
      "04:51:33 [INFO] train episode 3267: winner = 1, steps = 6\n",
      "04:51:37 [INFO] train episode 3268: winner = 1, steps = 4\n",
      "04:51:39 [INFO] train episode 3269: winner = -1, steps = 7\n",
      "04:51:40 [INFO] train episode 3270: winner = 1, steps = 4\n",
      "04:51:44 [INFO] train episode 3271: winner = 0, steps = 8\n",
      "04:51:46 [INFO] train episode 3272: winner = 0, steps = 8\n",
      "04:51:50 [INFO] train episode 3273: winner = 1, steps = 8\n",
      "04:51:52 [INFO] train episode 3274: winner = 1, steps = 6\n",
      "04:51:52 [INFO] train episode 3275: winner = 0, steps = 8\n",
      "04:51:55 [INFO] train episode 3276: winner = 1, steps = 4\n",
      "04:51:58 [INFO] train episode 3277: winner = 1, steps = 6\n",
      "04:52:00 [INFO] train episode 3278: winner = 1, steps = 4\n",
      "04:52:01 [INFO] train episode 3279: winner = 1, steps = 6\n",
      "04:52:01 [INFO] train episode 3280: winner = 1, steps = 6\n",
      "04:52:05 [INFO] train episode 3281: winner = 0, steps = 8\n",
      "04:52:05 [INFO] train episode 3282: winner = 1, steps = 4\n",
      "04:52:10 [INFO] train episode 3283: winner = -1, steps = 5\n",
      "04:52:10 [INFO] train episode 3284: winner = 1, steps = 4\n",
      "04:52:13 [INFO] train episode 3285: winner = 1, steps = 4\n",
      "04:52:17 [INFO] train episode 3286: winner = 1, steps = 6\n",
      "04:52:17 [INFO] train episode 3287: winner = 1, steps = 6\n",
      "04:52:18 [INFO] train episode 3288: winner = 1, steps = 4\n",
      "04:52:20 [INFO] train episode 3289: winner = 1, steps = 6\n",
      "04:52:21 [INFO] train episode 3290: winner = 1, steps = 4\n",
      "04:52:25 [INFO] train episode 3291: winner = 0, steps = 8\n",
      "04:52:27 [INFO] train episode 3292: winner = 1, steps = 6\n",
      "04:52:30 [INFO] train episode 3293: winner = 1, steps = 8\n",
      "04:52:34 [INFO] train episode 3294: winner = 0, steps = 8\n",
      "04:52:35 [INFO] train episode 3295: winner = 1, steps = 4\n",
      "04:52:39 [INFO] train episode 3296: winner = 0, steps = 8\n",
      "04:52:42 [INFO] train episode 3297: winner = 0, steps = 8\n",
      "04:52:43 [INFO] train episode 3298: winner = 1, steps = 4\n",
      "04:52:44 [INFO] train episode 3299: winner = 0, steps = 8\n",
      "04:52:44 [INFO] train episode 3300: winner = -1, steps = 7\n",
      "04:52:48 [INFO] train episode 3301: winner = 1, steps = 6\n",
      "04:52:48 [INFO] train episode 3302: winner = 1, steps = 4\n",
      "04:52:49 [INFO] train episode 3303: winner = 1, steps = 6\n",
      "04:52:50 [INFO] train episode 3304: winner = 1, steps = 4\n",
      "04:52:52 [INFO] train episode 3305: winner = -1, steps = 7\n",
      "04:52:53 [INFO] train episode 3306: winner = 0, steps = 8\n",
      "04:52:57 [INFO] train episode 3307: winner = 1, steps = 8\n",
      "04:52:59 [INFO] train episode 3308: winner = -1, steps = 7\n",
      "04:52:59 [INFO] train episode 3309: winner = 1, steps = 8\n",
      "04:53:01 [INFO] train episode 3310: winner = -1, steps = 7\n",
      "04:53:03 [INFO] train episode 3311: winner = 0, steps = 8\n",
      "04:53:03 [INFO] train episode 3312: winner = 0, steps = 8\n",
      "04:53:04 [INFO] train episode 3313: winner = 1, steps = 6\n",
      "04:53:06 [INFO] train episode 3314: winner = 1, steps = 4\n",
      "04:53:06 [INFO] train episode 3315: winner = 1, steps = 4\n",
      "04:53:07 [INFO] train episode 3316: winner = 1, steps = 8\n",
      "04:53:08 [INFO] train episode 3317: winner = -1, steps = 7\n",
      "04:53:08 [INFO] train episode 3318: winner = 1, steps = 4\n",
      "04:53:12 [INFO] train episode 3319: winner = 1, steps = 6\n",
      "04:53:12 [INFO] train episode 3320: winner = 1, steps = 6\n",
      "04:53:15 [INFO] train episode 3321: winner = 1, steps = 8\n",
      "04:53:16 [INFO] train episode 3322: winner = 1, steps = 6\n",
      "04:53:17 [INFO] train episode 3323: winner = 1, steps = 4\n",
      "04:53:17 [INFO] train episode 3324: winner = 0, steps = 8\n",
      "04:53:17 [INFO] train episode 3325: winner = 1, steps = 4\n",
      "04:53:17 [INFO] train episode 3326: winner = 1, steps = 4\n",
      "04:53:17 [INFO] train episode 3327: winner = 1, steps = 4\n",
      "04:53:20 [INFO] train episode 3328: winner = 0, steps = 8\n",
      "04:53:23 [INFO] train episode 3329: winner = 1, steps = 6\n",
      "04:53:23 [INFO] train episode 3330: winner = 1, steps = 6\n",
      "04:53:28 [INFO] train episode 3331: winner = -1, steps = 5\n",
      "04:53:30 [INFO] train episode 3332: winner = 1, steps = 6\n",
      "04:53:35 [INFO] train episode 3333: winner = 0, steps = 8\n",
      "04:53:35 [INFO] train episode 3334: winner = 1, steps = 4\n",
      "04:53:36 [INFO] train episode 3335: winner = 1, steps = 4\n",
      "04:53:36 [INFO] train episode 3336: winner = 1, steps = 6\n",
      "04:53:38 [INFO] train episode 3337: winner = 1, steps = 8\n",
      "04:53:40 [INFO] train episode 3338: winner = 1, steps = 6\n",
      "04:53:41 [INFO] train episode 3339: winner = 1, steps = 6\n",
      "04:53:43 [INFO] train episode 3340: winner = 0, steps = 8\n",
      "04:53:44 [INFO] train episode 3341: winner = 1, steps = 6\n",
      "04:53:44 [INFO] train episode 3342: winner = -1, steps = 5\n",
      "04:53:45 [INFO] train episode 3343: winner = 1, steps = 4\n",
      "04:53:47 [INFO] train episode 3344: winner = 1, steps = 4\n",
      "04:53:47 [INFO] train episode 3345: winner = 0, steps = 8\n",
      "04:53:49 [INFO] train episode 3346: winner = 0, steps = 8\n",
      "04:53:49 [INFO] train episode 3347: winner = 1, steps = 6\n",
      "04:53:49 [INFO] train episode 3348: winner = 1, steps = 6\n",
      "04:53:50 [INFO] train episode 3349: winner = -1, steps = 5\n",
      "04:53:50 [INFO] train episode 3350: winner = -1, steps = 7\n",
      "04:53:50 [INFO] train episode 3351: winner = 1, steps = 6\n",
      "04:53:50 [INFO] train episode 3352: winner = 1, steps = 4\n",
      "04:53:51 [INFO] train episode 3353: winner = 1, steps = 6\n",
      "04:53:51 [INFO] train episode 3354: winner = -1, steps = 5\n",
      "04:53:53 [INFO] train episode 3355: winner = 1, steps = 4\n",
      "04:53:53 [INFO] train episode 3356: winner = 1, steps = 6\n",
      "04:53:55 [INFO] train episode 3357: winner = 0, steps = 8\n",
      "04:53:55 [INFO] train episode 3358: winner = 1, steps = 6\n",
      "04:53:57 [INFO] train episode 3359: winner = 1, steps = 6\n",
      "04:53:57 [INFO] train episode 3360: winner = -1, steps = 7\n",
      "04:53:59 [INFO] train episode 3361: winner = 1, steps = 4\n",
      "04:53:59 [INFO] test episode 3361:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "04:54:21 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "04:54:39 [INFO] step 1：player -1, action (1, 1)\n",
      "+++\n",
      "+x+\n",
      "+o+\n",
      "04:54:56 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "+x+\n",
      "+o+\n",
      "04:55:02 [INFO] step 3：player -1, action (2, 2)\n",
      "+o+\n",
      "+x+\n",
      "+ox\n",
      "04:55:04 [INFO] step 4：player 1, action (1, 2)\n",
      "+o+\n",
      "+xo\n",
      "+ox\n",
      "04:55:05 [INFO] step 5：player -1, action (0, 0)\n",
      "xo+\n",
      "+xo\n",
      "+ox\n",
      "04:55:05 [INFO] test episode 3361: winner = -1, steps = 5\n",
      "04:55:41 [INFO] train episode 3362: winner = -1, steps = 7\n",
      "04:55:44 [INFO] train episode 3363: winner = 0, steps = 8\n",
      "04:56:19 [INFO] train episode 3364: winner = 1, steps = 6\n",
      "04:56:37 [INFO] train episode 3365: winner = 1, steps = 6\n",
      "04:56:48 [INFO] train episode 3366: winner = 0, steps = 8\n",
      "04:57:22 [INFO] train episode 3367: winner = 1, steps = 6\n",
      "04:57:54 [INFO] train episode 3368: winner = -1, steps = 7\n",
      "04:58:04 [INFO] train episode 3369: winner = 1, steps = 6\n",
      "04:58:16 [INFO] train episode 3370: winner = 0, steps = 8\n",
      "04:58:35 [INFO] train episode 3371: winner = 1, steps = 6\n",
      "04:58:36 [INFO] train episode 3372: winner = 1, steps = 6\n",
      "04:59:00 [INFO] train episode 3373: winner = -1, steps = 5\n",
      "04:59:08 [INFO] train episode 3374: winner = 1, steps = 4\n",
      "04:59:15 [INFO] train episode 3375: winner = 1, steps = 8\n",
      "04:59:21 [INFO] train episode 3376: winner = 1, steps = 6\n",
      "04:59:36 [INFO] train episode 3377: winner = 1, steps = 4\n",
      "04:59:42 [INFO] train episode 3378: winner = 1, steps = 6\n",
      "04:59:48 [INFO] train episode 3379: winner = -1, steps = 7\n",
      "05:00:04 [INFO] train episode 3380: winner = 1, steps = 4\n",
      "05:00:12 [INFO] train episode 3381: winner = -1, steps = 7\n",
      "05:00:21 [INFO] train episode 3382: winner = 1, steps = 4\n",
      "05:00:24 [INFO] train episode 3383: winner = 1, steps = 6\n",
      "05:00:27 [INFO] train episode 3384: winner = 1, steps = 4\n",
      "05:00:30 [INFO] train episode 3385: winner = 1, steps = 6\n",
      "05:00:35 [INFO] train episode 3386: winner = 0, steps = 8\n",
      "05:00:38 [INFO] train episode 3387: winner = 1, steps = 6\n",
      "05:00:45 [INFO] train episode 3388: winner = 0, steps = 8\n",
      "05:00:49 [INFO] train episode 3389: winner = 1, steps = 4\n",
      "05:00:51 [INFO] train episode 3390: winner = -1, steps = 7\n",
      "05:00:53 [INFO] train episode 3391: winner = -1, steps = 5\n",
      "05:01:12 [INFO] train episode 3392: winner = 0, steps = 8\n",
      "05:01:15 [INFO] train episode 3393: winner = 1, steps = 6\n",
      "05:01:21 [INFO] train episode 3394: winner = -1, steps = 7\n",
      "05:01:23 [INFO] train episode 3395: winner = -1, steps = 5\n",
      "05:01:27 [INFO] train episode 3396: winner = 1, steps = 6\n",
      "05:01:29 [INFO] train episode 3397: winner = 1, steps = 6\n",
      "05:01:32 [INFO] train episode 3398: winner = 0, steps = 8\n",
      "05:01:33 [INFO] train episode 3399: winner = -1, steps = 5\n",
      "05:01:37 [INFO] train episode 3400: winner = 1, steps = 6\n",
      "05:01:41 [INFO] train episode 3401: winner = -1, steps = 7\n",
      "05:01:42 [INFO] train episode 3402: winner = -1, steps = 5\n",
      "05:01:46 [INFO] train episode 3403: winner = -1, steps = 5\n",
      "05:01:47 [INFO] train episode 3404: winner = -1, steps = 5\n",
      "05:01:48 [INFO] train episode 3405: winner = 1, steps = 6\n",
      "05:01:49 [INFO] train episode 3406: winner = 1, steps = 6\n",
      "05:01:54 [INFO] train episode 3407: winner = 1, steps = 8\n",
      "05:01:55 [INFO] train episode 3408: winner = 1, steps = 8\n",
      "05:01:55 [INFO] train episode 3409: winner = 1, steps = 4\n",
      "05:01:56 [INFO] train episode 3410: winner = -1, steps = 7\n",
      "05:02:05 [INFO] train episode 3411: winner = 0, steps = 8\n",
      "05:02:07 [INFO] train episode 3412: winner = -1, steps = 7\n",
      "05:02:08 [INFO] train episode 3413: winner = -1, steps = 7\n",
      "05:02:17 [INFO] train episode 3414: winner = 1, steps = 6\n",
      "05:02:20 [INFO] train episode 3415: winner = 0, steps = 8\n",
      "05:02:24 [INFO] train episode 3416: winner = 0, steps = 8\n",
      "05:02:31 [INFO] train episode 3417: winner = 0, steps = 8\n",
      "05:02:35 [INFO] train episode 3418: winner = -1, steps = 5\n",
      "05:02:37 [INFO] train episode 3419: winner = -1, steps = 7\n",
      "05:02:42 [INFO] train episode 3420: winner = 1, steps = 6\n",
      "05:02:44 [INFO] train episode 3421: winner = 1, steps = 6\n",
      "05:02:45 [INFO] train episode 3422: winner = 1, steps = 8\n",
      "05:02:47 [INFO] train episode 3423: winner = 1, steps = 8\n",
      "05:02:50 [INFO] train episode 3424: winner = 1, steps = 4\n",
      "05:02:50 [INFO] train episode 3425: winner = 1, steps = 4\n",
      "05:02:53 [INFO] train episode 3426: winner = 0, steps = 8\n",
      "05:02:54 [INFO] train episode 3427: winner = 0, steps = 8\n",
      "05:02:55 [INFO] train episode 3428: winner = 1, steps = 6\n",
      "05:02:56 [INFO] train episode 3429: winner = 1, steps = 6\n",
      "05:02:56 [INFO] train episode 3430: winner = 0, steps = 8\n",
      "05:02:56 [INFO] train episode 3431: winner = 1, steps = 6\n",
      "05:02:57 [INFO] train episode 3432: winner = 1, steps = 6\n",
      "05:02:57 [INFO] train episode 3433: winner = 1, steps = 6\n",
      "05:02:59 [INFO] train episode 3434: winner = 1, steps = 4\n",
      "05:03:06 [INFO] train episode 3435: winner = 0, steps = 8\n",
      "05:03:13 [INFO] train episode 3436: winner = 0, steps = 8\n",
      "05:03:13 [INFO] train episode 3437: winner = -1, steps = 7\n",
      "05:03:15 [INFO] train episode 3438: winner = 0, steps = 8\n",
      "05:03:15 [INFO] train episode 3439: winner = 1, steps = 4\n",
      "05:03:17 [INFO] train episode 3440: winner = 1, steps = 4\n",
      "05:03:17 [INFO] train episode 3441: winner = 1, steps = 4\n",
      "05:03:17 [INFO] train episode 3442: winner = -1, steps = 7\n",
      "05:03:20 [INFO] train episode 3443: winner = 1, steps = 6\n",
      "05:03:21 [INFO] train episode 3444: winner = 0, steps = 8\n",
      "05:03:26 [INFO] train episode 3445: winner = 0, steps = 8\n",
      "05:03:27 [INFO] train episode 3446: winner = 1, steps = 8\n",
      "05:03:27 [INFO] train episode 3447: winner = 0, steps = 8\n",
      "05:03:27 [INFO] train episode 3448: winner = 1, steps = 6\n",
      "05:03:28 [INFO] train episode 3449: winner = 1, steps = 6\n",
      "05:03:28 [INFO] train episode 3450: winner = 0, steps = 8\n",
      "05:03:29 [INFO] train episode 3451: winner = 1, steps = 8\n",
      "05:03:33 [INFO] train episode 3452: winner = 1, steps = 4\n",
      "05:03:33 [INFO] train episode 3453: winner = -1, steps = 5\n",
      "05:03:34 [INFO] train episode 3454: winner = 1, steps = 4\n",
      "05:03:36 [INFO] train episode 3455: winner = 0, steps = 8\n",
      "05:03:38 [INFO] train episode 3456: winner = -1, steps = 7\n",
      "05:03:41 [INFO] train episode 3457: winner = 1, steps = 4\n",
      "05:03:43 [INFO] train episode 3458: winner = 1, steps = 6\n",
      "05:03:43 [INFO] train episode 3459: winner = 1, steps = 6\n",
      "05:03:48 [INFO] train episode 3460: winner = 1, steps = 8\n",
      "05:03:53 [INFO] train episode 3461: winner = 1, steps = 6\n",
      "05:03:54 [INFO] train episode 3462: winner = -1, steps = 5\n",
      "05:03:57 [INFO] train episode 3463: winner = 1, steps = 6\n",
      "05:03:58 [INFO] train episode 3464: winner = 1, steps = 8\n",
      "05:03:59 [INFO] train episode 3465: winner = -1, steps = 5\n",
      "05:04:03 [INFO] train episode 3466: winner = 0, steps = 8\n",
      "05:04:05 [INFO] train episode 3467: winner = 0, steps = 8\n",
      "05:04:06 [INFO] train episode 3468: winner = 1, steps = 4\n",
      "05:04:07 [INFO] train episode 3469: winner = -1, steps = 5\n",
      "05:04:07 [INFO] train episode 3470: winner = 1, steps = 6\n",
      "05:04:07 [INFO] train episode 3471: winner = 1, steps = 4\n",
      "05:04:07 [INFO] train episode 3472: winner = 1, steps = 6\n",
      "05:04:08 [INFO] train episode 3473: winner = 1, steps = 4\n",
      "05:04:12 [INFO] train episode 3474: winner = 0, steps = 8\n",
      "05:04:13 [INFO] train episode 3475: winner = 0, steps = 8\n",
      "05:04:14 [INFO] train episode 3476: winner = 1, steps = 4\n",
      "05:04:15 [INFO] train episode 3477: winner = 1, steps = 6\n",
      "05:04:15 [INFO] train episode 3478: winner = -1, steps = 5\n",
      "05:04:17 [INFO] train episode 3479: winner = 0, steps = 8\n",
      "05:04:17 [INFO] train episode 3480: winner = 1, steps = 6\n",
      "05:04:18 [INFO] train episode 3481: winner = 0, steps = 8\n",
      "05:04:19 [INFO] train episode 3482: winner = 1, steps = 4\n",
      "05:04:21 [INFO] train episode 3483: winner = 1, steps = 6\n",
      "05:04:21 [INFO] train episode 3484: winner = 1, steps = 6\n",
      "05:04:25 [INFO] train episode 3485: winner = -1, steps = 7\n",
      "05:04:25 [INFO] train episode 3486: winner = 1, steps = 4\n",
      "05:04:29 [INFO] train episode 3487: winner = 1, steps = 6\n",
      "05:04:29 [INFO] train episode 3488: winner = 1, steps = 6\n",
      "05:04:29 [INFO] train episode 3489: winner = 1, steps = 6\n",
      "05:04:31 [INFO] train episode 3490: winner = 1, steps = 6\n",
      "05:04:33 [INFO] train episode 3491: winner = 1, steps = 6\n",
      "05:04:35 [INFO] train episode 3492: winner = 1, steps = 6\n",
      "05:04:35 [INFO] train episode 3493: winner = 1, steps = 4\n",
      "05:04:35 [INFO] train episode 3494: winner = 1, steps = 4\n",
      "05:04:36 [INFO] train episode 3495: winner = 0, steps = 8\n",
      "05:04:37 [INFO] train episode 3496: winner = 1, steps = 6\n",
      "05:04:37 [INFO] train episode 3497: winner = 1, steps = 6\n",
      "05:04:40 [INFO] train episode 3498: winner = 1, steps = 6\n",
      "05:04:43 [INFO] train episode 3499: winner = 1, steps = 6\n",
      "05:04:44 [INFO] train episode 3500: winner = 0, steps = 8\n",
      "05:04:44 [INFO] test episode 3500:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:05:06 [INFO] step 0：player 1, action (1, 0)\n",
      "+++\n",
      "o++\n",
      "+++\n",
      "05:05:23 [INFO] step 1：player -1, action (1, 1)\n",
      "+++\n",
      "ox+\n",
      "+++\n",
      "05:05:38 [INFO] step 2：player 1, action (0, 1)\n",
      "+o+\n",
      "ox+\n",
      "+++\n",
      "05:05:44 [INFO] step 3：player -1, action (2, 0)\n",
      "+o+\n",
      "ox+\n",
      "x++\n",
      "05:05:44 [INFO] step 4：player 1, action (0, 2)\n",
      "+oo\n",
      "ox+\n",
      "x++\n",
      "05:05:45 [INFO] step 5：player -1, action (0, 0)\n",
      "xoo\n",
      "ox+\n",
      "x++\n",
      "05:05:45 [INFO] step 6：player 1, action (2, 2)\n",
      "xoo\n",
      "ox+\n",
      "x+o\n",
      "05:05:45 [INFO] step 7：player -1, action (1, 2)\n",
      "xoo\n",
      "oxx\n",
      "x+o\n",
      "05:05:45 [INFO] step 8：player 1, action (2, 1)\n",
      "xoo\n",
      "oxx\n",
      "xoo\n",
      "05:05:45 [INFO] test episode 3500: winner = 0, steps = 8\n",
      "05:06:17 [INFO] train episode 3501: winner = 0, steps = 8\n",
      "05:06:25 [INFO] train episode 3502: winner = 1, steps = 6\n",
      "05:06:52 [INFO] train episode 3503: winner = 1, steps = 6\n",
      "05:07:06 [INFO] train episode 3504: winner = 1, steps = 8\n",
      "05:07:32 [INFO] train episode 3505: winner = -1, steps = 7\n",
      "05:07:36 [INFO] train episode 3506: winner = 1, steps = 4\n",
      "05:08:08 [INFO] train episode 3507: winner = 0, steps = 8\n",
      "05:08:18 [INFO] train episode 3508: winner = 1, steps = 4\n",
      "05:08:21 [INFO] train episode 3509: winner = 0, steps = 8\n",
      "05:08:32 [INFO] train episode 3510: winner = 0, steps = 8\n",
      "05:08:59 [INFO] train episode 3511: winner = 1, steps = 6\n",
      "05:09:19 [INFO] train episode 3512: winner = 1, steps = 4\n",
      "05:09:24 [INFO] train episode 3513: winner = 1, steps = 6\n",
      "05:09:35 [INFO] train episode 3514: winner = 1, steps = 4\n",
      "05:10:02 [INFO] train episode 3515: winner = -1, steps = 7\n",
      "05:10:11 [INFO] train episode 3516: winner = 0, steps = 8\n",
      "05:10:21 [INFO] train episode 3517: winner = 1, steps = 4\n",
      "05:10:23 [INFO] train episode 3518: winner = 1, steps = 4\n",
      "05:10:27 [INFO] train episode 3519: winner = 1, steps = 4\n",
      "05:10:33 [INFO] train episode 3520: winner = 0, steps = 8\n",
      "05:10:36 [INFO] train episode 3521: winner = 1, steps = 4\n",
      "05:10:41 [INFO] train episode 3522: winner = 1, steps = 6\n",
      "05:10:50 [INFO] train episode 3523: winner = 0, steps = 8\n",
      "05:11:04 [INFO] train episode 3524: winner = 1, steps = 6\n",
      "05:11:08 [INFO] train episode 3525: winner = 1, steps = 6\n",
      "05:11:10 [INFO] train episode 3526: winner = 0, steps = 8\n",
      "05:11:12 [INFO] train episode 3527: winner = 0, steps = 8\n",
      "05:11:29 [INFO] train episode 3528: winner = 1, steps = 4\n",
      "05:11:29 [INFO] train episode 3529: winner = 1, steps = 4\n",
      "05:11:31 [INFO] train episode 3530: winner = 0, steps = 8\n",
      "05:11:40 [INFO] train episode 3531: winner = -1, steps = 7\n",
      "05:11:41 [INFO] train episode 3532: winner = 1, steps = 4\n",
      "05:11:51 [INFO] train episode 3533: winner = 1, steps = 6\n",
      "05:11:54 [INFO] train episode 3534: winner = 1, steps = 6\n",
      "05:11:57 [INFO] train episode 3535: winner = 1, steps = 8\n",
      "05:11:58 [INFO] train episode 3536: winner = -1, steps = 7\n",
      "05:11:58 [INFO] train episode 3537: winner = -1, steps = 7\n",
      "05:12:02 [INFO] train episode 3538: winner = 1, steps = 6\n",
      "05:12:09 [INFO] train episode 3539: winner = -1, steps = 5\n",
      "05:12:15 [INFO] train episode 3540: winner = 0, steps = 8\n",
      "05:12:23 [INFO] train episode 3541: winner = -1, steps = 5\n",
      "05:12:29 [INFO] train episode 3542: winner = 1, steps = 4\n",
      "05:12:31 [INFO] train episode 3543: winner = -1, steps = 7\n",
      "05:12:31 [INFO] train episode 3544: winner = -1, steps = 5\n",
      "05:12:35 [INFO] train episode 3545: winner = 1, steps = 4\n",
      "05:12:38 [INFO] train episode 3546: winner = 0, steps = 8\n",
      "05:12:38 [INFO] train episode 3547: winner = 1, steps = 6\n",
      "05:12:39 [INFO] train episode 3548: winner = 1, steps = 6\n",
      "05:12:46 [INFO] train episode 3549: winner = -1, steps = 7\n",
      "05:12:47 [INFO] train episode 3550: winner = 1, steps = 4\n",
      "05:12:54 [INFO] train episode 3551: winner = 1, steps = 8\n",
      "05:13:01 [INFO] train episode 3552: winner = 0, steps = 8\n",
      "05:13:04 [INFO] train episode 3553: winner = 1, steps = 6\n",
      "05:13:05 [INFO] train episode 3554: winner = -1, steps = 5\n",
      "05:13:10 [INFO] train episode 3555: winner = 0, steps = 8\n",
      "05:13:13 [INFO] train episode 3556: winner = 0, steps = 8\n",
      "05:13:23 [INFO] train episode 3557: winner = 0, steps = 8\n",
      "05:13:27 [INFO] train episode 3558: winner = 0, steps = 8\n",
      "05:13:28 [INFO] train episode 3559: winner = 1, steps = 4\n",
      "05:13:29 [INFO] train episode 3560: winner = 1, steps = 4\n",
      "05:13:30 [INFO] train episode 3561: winner = 1, steps = 6\n",
      "05:13:36 [INFO] train episode 3562: winner = 1, steps = 6\n",
      "05:13:39 [INFO] train episode 3563: winner = 1, steps = 6\n",
      "05:13:41 [INFO] train episode 3564: winner = 0, steps = 8\n",
      "05:13:42 [INFO] train episode 3565: winner = 1, steps = 4\n",
      "05:13:44 [INFO] train episode 3566: winner = 1, steps = 4\n",
      "05:13:47 [INFO] train episode 3567: winner = 1, steps = 6\n",
      "05:13:49 [INFO] train episode 3568: winner = 0, steps = 8\n",
      "05:13:50 [INFO] train episode 3569: winner = 1, steps = 4\n",
      "05:13:53 [INFO] train episode 3570: winner = 0, steps = 8\n",
      "05:13:54 [INFO] train episode 3571: winner = 1, steps = 4\n",
      "05:13:54 [INFO] train episode 3572: winner = 1, steps = 6\n",
      "05:13:56 [INFO] train episode 3573: winner = 1, steps = 6\n",
      "05:13:56 [INFO] train episode 3574: winner = 1, steps = 4\n",
      "05:13:58 [INFO] train episode 3575: winner = 1, steps = 4\n",
      "05:14:03 [INFO] train episode 3576: winner = 1, steps = 8\n",
      "05:14:03 [INFO] train episode 3577: winner = 1, steps = 6\n",
      "05:14:04 [INFO] train episode 3578: winner = 1, steps = 6\n",
      "05:14:05 [INFO] train episode 3579: winner = 1, steps = 4\n",
      "05:14:07 [INFO] train episode 3580: winner = 0, steps = 8\n",
      "05:14:09 [INFO] train episode 3581: winner = -1, steps = 7\n",
      "05:14:10 [INFO] train episode 3582: winner = 1, steps = 6\n",
      "05:14:10 [INFO] train episode 3583: winner = 1, steps = 4\n",
      "05:14:11 [INFO] train episode 3584: winner = 1, steps = 6\n",
      "05:14:13 [INFO] train episode 3585: winner = 1, steps = 6\n",
      "05:14:17 [INFO] train episode 3586: winner = 1, steps = 8\n",
      "05:14:20 [INFO] train episode 3587: winner = -1, steps = 7\n",
      "05:14:20 [INFO] train episode 3588: winner = 1, steps = 4\n",
      "05:14:22 [INFO] train episode 3589: winner = 1, steps = 6\n",
      "05:14:26 [INFO] train episode 3590: winner = 0, steps = 8\n",
      "05:14:27 [INFO] train episode 3591: winner = 0, steps = 8\n",
      "05:14:30 [INFO] train episode 3592: winner = 0, steps = 8\n",
      "05:14:31 [INFO] train episode 3593: winner = 1, steps = 4\n",
      "05:14:31 [INFO] train episode 3594: winner = 1, steps = 4\n",
      "05:14:34 [INFO] train episode 3595: winner = -1, steps = 5\n",
      "05:14:36 [INFO] train episode 3596: winner = 0, steps = 8\n",
      "05:14:36 [INFO] train episode 3597: winner = 1, steps = 6\n",
      "05:14:38 [INFO] train episode 3598: winner = -1, steps = 5\n",
      "05:14:40 [INFO] train episode 3599: winner = 1, steps = 6\n",
      "05:14:41 [INFO] train episode 3600: winner = 1, steps = 4\n",
      "05:14:42 [INFO] train episode 3601: winner = 0, steps = 8\n",
      "05:14:44 [INFO] train episode 3602: winner = 0, steps = 8\n",
      "05:14:45 [INFO] train episode 3603: winner = 0, steps = 8\n",
      "05:14:46 [INFO] train episode 3604: winner = 0, steps = 8\n",
      "05:14:47 [INFO] train episode 3605: winner = 0, steps = 8\n",
      "05:14:47 [INFO] train episode 3606: winner = 1, steps = 6\n",
      "05:14:48 [INFO] train episode 3607: winner = 1, steps = 6\n",
      "05:14:49 [INFO] train episode 3608: winner = 1, steps = 6\n",
      "05:14:49 [INFO] train episode 3609: winner = 1, steps = 6\n",
      "05:14:49 [INFO] train episode 3610: winner = 1, steps = 6\n",
      "05:14:50 [INFO] train episode 3611: winner = 1, steps = 6\n",
      "05:14:50 [INFO] train episode 3612: winner = 0, steps = 8\n",
      "05:14:51 [INFO] train episode 3613: winner = 0, steps = 8\n",
      "05:14:53 [INFO] train episode 3614: winner = 1, steps = 8\n",
      "05:14:55 [INFO] train episode 3615: winner = 0, steps = 8\n",
      "05:14:58 [INFO] train episode 3616: winner = 1, steps = 8\n",
      "05:15:05 [INFO] train episode 3617: winner = 0, steps = 8\n",
      "05:15:06 [INFO] train episode 3618: winner = 1, steps = 6\n",
      "05:15:06 [INFO] train episode 3619: winner = 1, steps = 6\n",
      "05:15:07 [INFO] train episode 3620: winner = 0, steps = 8\n",
      "05:15:11 [INFO] train episode 3621: winner = 0, steps = 8\n",
      "05:15:11 [INFO] train episode 3622: winner = 0, steps = 8\n",
      "05:15:13 [INFO] train episode 3623: winner = 0, steps = 8\n",
      "05:15:13 [INFO] train episode 3624: winner = 1, steps = 4\n",
      "05:15:16 [INFO] train episode 3625: winner = -1, steps = 5\n",
      "05:15:19 [INFO] train episode 3626: winner = 0, steps = 8\n",
      "05:15:19 [INFO] train episode 3627: winner = 1, steps = 6\n",
      "05:15:21 [INFO] train episode 3628: winner = 1, steps = 6\n",
      "05:15:21 [INFO] train episode 3629: winner = 1, steps = 4\n",
      "05:15:23 [INFO] train episode 3630: winner = 0, steps = 8\n",
      "05:15:24 [INFO] train episode 3631: winner = 1, steps = 6\n",
      "05:15:25 [INFO] train episode 3632: winner = 1, steps = 4\n",
      "05:15:26 [INFO] train episode 3633: winner = 1, steps = 4\n",
      "05:15:26 [INFO] train episode 3634: winner = 1, steps = 8\n",
      "05:15:27 [INFO] train episode 3635: winner = 1, steps = 6\n",
      "05:15:28 [INFO] train episode 3636: winner = 1, steps = 4\n",
      "05:15:28 [INFO] train episode 3637: winner = 1, steps = 4\n",
      "05:15:30 [INFO] train episode 3638: winner = 1, steps = 8\n",
      "05:15:31 [INFO] train episode 3639: winner = 0, steps = 8\n",
      "05:15:31 [INFO] test episode 3639:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:15:53 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "05:16:10 [INFO] step 1：player -1, action (0, 0)\n",
      "xo+\n",
      "+++\n",
      "+++\n",
      "05:16:24 [INFO] step 2：player 1, action (2, 1)\n",
      "xo+\n",
      "+++\n",
      "+o+\n",
      "05:16:26 [INFO] step 3：player -1, action (1, 1)\n",
      "xo+\n",
      "+x+\n",
      "+o+\n",
      "05:16:26 [INFO] step 4：player 1, action (2, 0)\n",
      "xo+\n",
      "+x+\n",
      "oo+\n",
      "05:16:26 [INFO] step 5：player -1, action (2, 2)\n",
      "xo+\n",
      "+x+\n",
      "oox\n",
      "05:16:26 [INFO] test episode 3639: winner = -1, steps = 5\n",
      "05:16:59 [INFO] train episode 3640: winner = 1, steps = 6\n",
      "05:17:33 [INFO] train episode 3641: winner = 1, steps = 4\n",
      "05:17:40 [INFO] train episode 3642: winner = -1, steps = 7\n",
      "05:18:19 [INFO] train episode 3643: winner = -1, steps = 5\n",
      "05:18:24 [INFO] train episode 3644: winner = 1, steps = 6\n",
      "05:19:03 [INFO] train episode 3645: winner = 0, steps = 8\n",
      "05:19:32 [INFO] train episode 3646: winner = -1, steps = 5\n",
      "05:19:51 [INFO] train episode 3647: winner = 1, steps = 6\n",
      "05:20:02 [INFO] train episode 3648: winner = -1, steps = 7\n",
      "05:20:21 [INFO] train episode 3649: winner = 1, steps = 4\n",
      "05:20:34 [INFO] train episode 3650: winner = 1, steps = 4\n",
      "05:20:41 [INFO] train episode 3651: winner = 1, steps = 4\n",
      "05:20:49 [INFO] train episode 3652: winner = 0, steps = 8\n",
      "05:21:05 [INFO] train episode 3653: winner = 1, steps = 6\n",
      "05:21:18 [INFO] train episode 3654: winner = 0, steps = 8\n",
      "05:21:34 [INFO] train episode 3655: winner = -1, steps = 5\n",
      "05:21:43 [INFO] train episode 3656: winner = 0, steps = 8\n",
      "05:21:46 [INFO] train episode 3657: winner = 1, steps = 6\n",
      "05:21:46 [INFO] train episode 3658: winner = 1, steps = 4\n",
      "05:21:46 [INFO] train episode 3659: winner = 1, steps = 6\n",
      "05:21:52 [INFO] train episode 3660: winner = 0, steps = 8\n",
      "05:22:00 [INFO] train episode 3661: winner = 0, steps = 8\n",
      "05:22:03 [INFO] train episode 3662: winner = 1, steps = 4\n",
      "05:22:12 [INFO] train episode 3663: winner = 0, steps = 8\n",
      "05:22:13 [INFO] train episode 3664: winner = 0, steps = 8\n",
      "05:22:16 [INFO] train episode 3665: winner = 0, steps = 8\n",
      "05:22:26 [INFO] train episode 3666: winner = 0, steps = 8\n",
      "05:22:34 [INFO] train episode 3667: winner = 1, steps = 6\n",
      "05:22:39 [INFO] train episode 3668: winner = 1, steps = 4\n",
      "05:22:39 [INFO] train episode 3669: winner = 0, steps = 8\n",
      "05:22:45 [INFO] train episode 3670: winner = 1, steps = 4\n",
      "05:22:46 [INFO] train episode 3671: winner = 0, steps = 8\n",
      "05:22:51 [INFO] train episode 3672: winner = 1, steps = 6\n",
      "05:22:57 [INFO] train episode 3673: winner = 1, steps = 6\n",
      "05:22:58 [INFO] train episode 3674: winner = 1, steps = 4\n",
      "05:22:59 [INFO] train episode 3675: winner = 1, steps = 6\n",
      "05:23:01 [INFO] train episode 3676: winner = 1, steps = 4\n",
      "05:23:09 [INFO] train episode 3677: winner = 1, steps = 8\n",
      "05:23:11 [INFO] train episode 3678: winner = -1, steps = 5\n",
      "05:23:13 [INFO] train episode 3679: winner = 0, steps = 8\n",
      "05:23:14 [INFO] train episode 3680: winner = 1, steps = 6\n",
      "05:23:17 [INFO] train episode 3681: winner = 1, steps = 4\n",
      "05:23:17 [INFO] train episode 3682: winner = 0, steps = 8\n",
      "05:23:28 [INFO] train episode 3683: winner = 0, steps = 8\n",
      "05:23:31 [INFO] train episode 3684: winner = 1, steps = 6\n",
      "05:23:33 [INFO] train episode 3685: winner = 1, steps = 4\n",
      "05:23:35 [INFO] train episode 3686: winner = 1, steps = 4\n",
      "05:23:38 [INFO] train episode 3687: winner = 1, steps = 4\n",
      "05:23:38 [INFO] train episode 3688: winner = 1, steps = 4\n",
      "05:23:47 [INFO] train episode 3689: winner = 0, steps = 8\n",
      "05:23:52 [INFO] train episode 3690: winner = 0, steps = 8\n",
      "05:23:55 [INFO] train episode 3691: winner = 1, steps = 6\n",
      "05:24:01 [INFO] train episode 3692: winner = 1, steps = 6\n",
      "05:24:02 [INFO] train episode 3693: winner = 1, steps = 4\n",
      "05:24:04 [INFO] train episode 3694: winner = -1, steps = 7\n",
      "05:24:04 [INFO] train episode 3695: winner = 1, steps = 6\n",
      "05:24:09 [INFO] train episode 3696: winner = 1, steps = 6\n",
      "05:24:10 [INFO] train episode 3697: winner = -1, steps = 7\n",
      "05:24:11 [INFO] train episode 3698: winner = 0, steps = 8\n",
      "05:24:15 [INFO] train episode 3699: winner = 0, steps = 8\n",
      "05:24:17 [INFO] train episode 3700: winner = 1, steps = 6\n",
      "05:24:17 [INFO] train episode 3701: winner = 1, steps = 6\n",
      "05:24:17 [INFO] train episode 3702: winner = 1, steps = 4\n",
      "05:24:17 [INFO] train episode 3703: winner = 1, steps = 4\n",
      "05:24:17 [INFO] train episode 3704: winner = 1, steps = 4\n",
      "05:24:20 [INFO] train episode 3705: winner = 1, steps = 6\n",
      "05:24:20 [INFO] train episode 3706: winner = 1, steps = 6\n",
      "05:24:22 [INFO] train episode 3707: winner = 0, steps = 8\n",
      "05:24:22 [INFO] train episode 3708: winner = 1, steps = 6\n",
      "05:24:25 [INFO] train episode 3709: winner = 0, steps = 8\n",
      "05:24:26 [INFO] train episode 3710: winner = 1, steps = 4\n",
      "05:24:27 [INFO] train episode 3711: winner = 1, steps = 6\n",
      "05:24:28 [INFO] train episode 3712: winner = 1, steps = 6\n",
      "05:24:29 [INFO] train episode 3713: winner = -1, steps = 7\n",
      "05:24:29 [INFO] train episode 3714: winner = 0, steps = 8\n",
      "05:24:29 [INFO] train episode 3715: winner = 0, steps = 8\n",
      "05:24:33 [INFO] train episode 3716: winner = 0, steps = 8\n",
      "05:24:37 [INFO] train episode 3717: winner = -1, steps = 7\n",
      "05:24:41 [INFO] train episode 3718: winner = 1, steps = 6\n",
      "05:24:42 [INFO] train episode 3719: winner = 1, steps = 4\n",
      "05:24:43 [INFO] train episode 3720: winner = 1, steps = 6\n",
      "05:24:49 [INFO] train episode 3721: winner = 0, steps = 8\n",
      "05:24:49 [INFO] train episode 3722: winner = 1, steps = 4\n",
      "05:24:50 [INFO] train episode 3723: winner = 0, steps = 8\n",
      "05:24:51 [INFO] train episode 3724: winner = 1, steps = 6\n",
      "05:24:53 [INFO] train episode 3725: winner = 1, steps = 4\n",
      "05:24:56 [INFO] train episode 3726: winner = 1, steps = 4\n",
      "05:24:59 [INFO] train episode 3727: winner = 1, steps = 6\n",
      "05:25:02 [INFO] train episode 3728: winner = 1, steps = 6\n",
      "05:25:05 [INFO] train episode 3729: winner = 0, steps = 8\n",
      "05:25:06 [INFO] train episode 3730: winner = 0, steps = 8\n",
      "05:25:06 [INFO] train episode 3731: winner = 1, steps = 6\n",
      "05:25:06 [INFO] train episode 3732: winner = 1, steps = 4\n",
      "05:25:07 [INFO] train episode 3733: winner = 1, steps = 6\n",
      "05:25:12 [INFO] train episode 3734: winner = 0, steps = 8\n",
      "05:25:14 [INFO] train episode 3735: winner = 1, steps = 6\n",
      "05:25:14 [INFO] train episode 3736: winner = 1, steps = 4\n",
      "05:25:16 [INFO] train episode 3737: winner = 1, steps = 6\n",
      "05:25:18 [INFO] train episode 3738: winner = 1, steps = 6\n",
      "05:25:18 [INFO] train episode 3739: winner = 1, steps = 4\n",
      "05:25:19 [INFO] train episode 3740: winner = 1, steps = 6\n",
      "05:25:19 [INFO] train episode 3741: winner = 1, steps = 6\n",
      "05:25:19 [INFO] train episode 3742: winner = 1, steps = 4\n",
      "05:25:22 [INFO] train episode 3743: winner = 1, steps = 6\n",
      "05:25:26 [INFO] train episode 3744: winner = 1, steps = 4\n",
      "05:25:26 [INFO] train episode 3745: winner = 1, steps = 4\n",
      "05:25:26 [INFO] train episode 3746: winner = 1, steps = 6\n",
      "05:25:28 [INFO] train episode 3747: winner = 1, steps = 8\n",
      "05:25:31 [INFO] train episode 3748: winner = 0, steps = 8\n",
      "05:25:32 [INFO] train episode 3749: winner = 1, steps = 6\n",
      "05:25:37 [INFO] train episode 3750: winner = 0, steps = 8\n",
      "05:25:41 [INFO] train episode 3751: winner = -1, steps = 5\n",
      "05:25:43 [INFO] train episode 3752: winner = 1, steps = 6\n",
      "05:25:46 [INFO] train episode 3753: winner = 0, steps = 8\n",
      "05:25:46 [INFO] train episode 3754: winner = 1, steps = 4\n",
      "05:25:48 [INFO] train episode 3755: winner = 0, steps = 8\n",
      "05:25:48 [INFO] train episode 3756: winner = 0, steps = 8\n",
      "05:25:49 [INFO] train episode 3757: winner = 1, steps = 6\n",
      "05:25:49 [INFO] train episode 3758: winner = 1, steps = 4\n",
      "05:25:50 [INFO] train episode 3759: winner = 0, steps = 8\n",
      "05:25:50 [INFO] train episode 3760: winner = 1, steps = 6\n",
      "05:25:51 [INFO] train episode 3761: winner = -1, steps = 5\n",
      "05:25:52 [INFO] train episode 3762: winner = 0, steps = 8\n",
      "05:25:53 [INFO] train episode 3763: winner = 1, steps = 6\n",
      "05:25:54 [INFO] train episode 3764: winner = 1, steps = 6\n",
      "05:25:54 [INFO] train episode 3765: winner = 0, steps = 8\n",
      "05:25:56 [INFO] train episode 3766: winner = 0, steps = 8\n",
      "05:25:57 [INFO] train episode 3767: winner = 1, steps = 4\n",
      "05:25:58 [INFO] train episode 3768: winner = -1, steps = 5\n",
      "05:25:59 [INFO] train episode 3769: winner = 1, steps = 6\n",
      "05:25:59 [INFO] train episode 3770: winner = 0, steps = 8\n",
      "05:26:00 [INFO] train episode 3771: winner = -1, steps = 5\n",
      "05:26:01 [INFO] train episode 3772: winner = 1, steps = 6\n",
      "05:26:02 [INFO] train episode 3773: winner = 0, steps = 8\n",
      "05:26:03 [INFO] train episode 3774: winner = -1, steps = 5\n",
      "05:26:03 [INFO] train episode 3775: winner = 1, steps = 6\n",
      "05:26:06 [INFO] train episode 3776: winner = 1, steps = 8\n",
      "05:26:08 [INFO] train episode 3777: winner = 0, steps = 8\n",
      "05:26:08 [INFO] train episode 3778: winner = 1, steps = 6\n",
      "05:26:11 [INFO] train episode 3779: winner = 0, steps = 8\n",
      "05:26:11 [INFO] test episode 3779:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:26:33 [INFO] step 0：player 1, action (2, 1)\n",
      "+++\n",
      "+++\n",
      "+o+\n",
      "05:26:52 [INFO] step 1：player -1, action (1, 0)\n",
      "+++\n",
      "x++\n",
      "+o+\n",
      "05:27:09 [INFO] step 2：player 1, action (2, 2)\n",
      "+++\n",
      "x++\n",
      "+oo\n",
      "05:27:12 [INFO] step 3：player -1, action (0, 0)\n",
      "x++\n",
      "x++\n",
      "+oo\n",
      "05:27:13 [INFO] step 4：player 1, action (2, 0)\n",
      "x++\n",
      "x++\n",
      "ooo\n",
      "05:27:13 [INFO] test episode 3779: winner = 1, steps = 4\n",
      "05:27:45 [INFO] train episode 3780: winner = 1, steps = 4\n",
      "05:28:00 [INFO] train episode 3781: winner = 0, steps = 8\n",
      "05:28:32 [INFO] train episode 3782: winner = 1, steps = 6\n",
      "05:28:50 [INFO] train episode 3783: winner = 1, steps = 4\n",
      "05:29:22 [INFO] train episode 3784: winner = -1, steps = 5\n",
      "05:29:52 [INFO] train episode 3785: winner = -1, steps = 7\n",
      "05:30:29 [INFO] train episode 3786: winner = -1, steps = 5\n",
      "05:30:51 [INFO] train episode 3787: winner = 0, steps = 8\n",
      "05:30:54 [INFO] train episode 3788: winner = 1, steps = 6\n",
      "05:30:57 [INFO] train episode 3789: winner = -1, steps = 7\n",
      "05:31:03 [INFO] train episode 3790: winner = 1, steps = 6\n",
      "05:31:09 [INFO] train episode 3791: winner = 1, steps = 6\n",
      "05:31:13 [INFO] train episode 3792: winner = 1, steps = 4\n",
      "05:31:19 [INFO] train episode 3793: winner = 1, steps = 4\n",
      "05:31:42 [INFO] train episode 3794: winner = 0, steps = 8\n",
      "05:31:47 [INFO] train episode 3795: winner = 0, steps = 8\n",
      "05:31:48 [INFO] train episode 3796: winner = -1, steps = 7\n",
      "05:31:58 [INFO] train episode 3797: winner = -1, steps = 5\n",
      "05:31:59 [INFO] train episode 3798: winner = 1, steps = 4\n",
      "05:32:05 [INFO] train episode 3799: winner = 1, steps = 6\n",
      "05:32:15 [INFO] train episode 3800: winner = 1, steps = 6\n",
      "05:32:33 [INFO] train episode 3801: winner = 1, steps = 8\n",
      "05:32:34 [INFO] train episode 3802: winner = 1, steps = 4\n",
      "05:32:40 [INFO] train episode 3803: winner = 0, steps = 8\n",
      "05:32:43 [INFO] train episode 3804: winner = 1, steps = 4\n",
      "05:32:50 [INFO] train episode 3805: winner = 1, steps = 6\n",
      "05:32:51 [INFO] train episode 3806: winner = 1, steps = 4\n",
      "05:32:53 [INFO] train episode 3807: winner = 1, steps = 4\n",
      "05:32:55 [INFO] train episode 3808: winner = -1, steps = 5\n",
      "05:33:06 [INFO] train episode 3809: winner = 0, steps = 8\n",
      "05:33:08 [INFO] train episode 3810: winner = 1, steps = 6\n",
      "05:33:09 [INFO] train episode 3811: winner = 1, steps = 4\n",
      "05:33:16 [INFO] train episode 3812: winner = -1, steps = 7\n",
      "05:33:20 [INFO] train episode 3813: winner = 1, steps = 6\n",
      "05:33:23 [INFO] train episode 3814: winner = 1, steps = 4\n",
      "05:33:24 [INFO] train episode 3815: winner = 0, steps = 8\n",
      "05:33:24 [INFO] train episode 3816: winner = 1, steps = 6\n",
      "05:33:25 [INFO] train episode 3817: winner = 1, steps = 4\n",
      "05:33:29 [INFO] train episode 3818: winner = 0, steps = 8\n",
      "05:33:30 [INFO] train episode 3819: winner = 1, steps = 4\n",
      "05:33:31 [INFO] train episode 3820: winner = 1, steps = 4\n",
      "05:33:37 [INFO] train episode 3821: winner = -1, steps = 5\n",
      "05:33:39 [INFO] train episode 3822: winner = -1, steps = 5\n",
      "05:33:41 [INFO] train episode 3823: winner = 1, steps = 4\n",
      "05:33:43 [INFO] train episode 3824: winner = 1, steps = 6\n",
      "05:33:48 [INFO] train episode 3825: winner = -1, steps = 5\n",
      "05:33:52 [INFO] train episode 3826: winner = 1, steps = 6\n",
      "05:33:54 [INFO] train episode 3827: winner = 1, steps = 6\n",
      "05:34:00 [INFO] train episode 3828: winner = -1, steps = 7\n",
      "05:34:02 [INFO] train episode 3829: winner = 1, steps = 8\n",
      "05:34:15 [INFO] train episode 3830: winner = 1, steps = 6\n",
      "05:34:17 [INFO] train episode 3831: winner = 0, steps = 8\n",
      "05:34:24 [INFO] train episode 3832: winner = -1, steps = 7\n",
      "05:34:31 [INFO] train episode 3833: winner = 0, steps = 8\n",
      "05:34:31 [INFO] train episode 3834: winner = 1, steps = 4\n",
      "05:34:32 [INFO] train episode 3835: winner = 1, steps = 4\n",
      "05:34:33 [INFO] train episode 3836: winner = 1, steps = 6\n",
      "05:34:33 [INFO] train episode 3837: winner = 1, steps = 4\n",
      "05:34:37 [INFO] train episode 3838: winner = 1, steps = 8\n",
      "05:34:37 [INFO] train episode 3839: winner = -1, steps = 7\n",
      "05:34:39 [INFO] train episode 3840: winner = 0, steps = 8\n",
      "05:34:40 [INFO] train episode 3841: winner = 1, steps = 6\n",
      "05:34:40 [INFO] train episode 3842: winner = 1, steps = 4\n",
      "05:34:44 [INFO] train episode 3843: winner = 1, steps = 6\n",
      "05:34:45 [INFO] train episode 3844: winner = 0, steps = 8\n",
      "05:34:47 [INFO] train episode 3845: winner = 0, steps = 8\n",
      "05:34:51 [INFO] train episode 3846: winner = 1, steps = 6\n",
      "05:34:55 [INFO] train episode 3847: winner = 1, steps = 8\n",
      "05:34:56 [INFO] train episode 3848: winner = 1, steps = 6\n",
      "05:34:57 [INFO] train episode 3849: winner = 1, steps = 6\n",
      "05:34:57 [INFO] train episode 3850: winner = -1, steps = 7\n",
      "05:34:59 [INFO] train episode 3851: winner = 0, steps = 8\n",
      "05:35:04 [INFO] train episode 3852: winner = -1, steps = 5\n",
      "05:35:04 [INFO] train episode 3853: winner = 1, steps = 4\n",
      "05:35:04 [INFO] train episode 3854: winner = 1, steps = 4\n",
      "05:35:12 [INFO] train episode 3855: winner = 1, steps = 6\n",
      "05:35:13 [INFO] train episode 3856: winner = -1, steps = 5\n",
      "05:35:15 [INFO] train episode 3857: winner = 1, steps = 4\n",
      "05:35:17 [INFO] train episode 3858: winner = 0, steps = 8\n",
      "05:35:17 [INFO] train episode 3859: winner = 0, steps = 8\n",
      "05:35:21 [INFO] train episode 3860: winner = 1, steps = 6\n",
      "05:35:22 [INFO] train episode 3861: winner = -1, steps = 7\n",
      "05:35:22 [INFO] train episode 3862: winner = 1, steps = 6\n",
      "05:35:22 [INFO] train episode 3863: winner = 1, steps = 4\n",
      "05:35:23 [INFO] train episode 3864: winner = -1, steps = 7\n",
      "05:35:23 [INFO] train episode 3865: winner = 1, steps = 4\n",
      "05:35:25 [INFO] train episode 3866: winner = 1, steps = 4\n",
      "05:35:27 [INFO] train episode 3867: winner = 1, steps = 4\n",
      "05:35:27 [INFO] train episode 3868: winner = 1, steps = 4\n",
      "05:35:28 [INFO] train episode 3869: winner = 1, steps = 4\n",
      "05:35:28 [INFO] train episode 3870: winner = 1, steps = 6\n",
      "05:35:30 [INFO] train episode 3871: winner = 1, steps = 4\n",
      "05:35:36 [INFO] train episode 3872: winner = 1, steps = 6\n",
      "05:35:39 [INFO] train episode 3873: winner = 1, steps = 8\n",
      "05:35:42 [INFO] train episode 3874: winner = 0, steps = 8\n",
      "05:35:46 [INFO] train episode 3875: winner = -1, steps = 7\n",
      "05:35:46 [INFO] train episode 3876: winner = 1, steps = 4\n",
      "05:35:46 [INFO] train episode 3877: winner = 1, steps = 6\n",
      "05:35:46 [INFO] train episode 3878: winner = 1, steps = 6\n",
      "05:35:47 [INFO] train episode 3879: winner = -1, steps = 7\n",
      "05:35:48 [INFO] train episode 3880: winner = -1, steps = 5\n",
      "05:35:49 [INFO] train episode 3881: winner = -1, steps = 5\n",
      "05:35:49 [INFO] train episode 3882: winner = 1, steps = 4\n",
      "05:35:49 [INFO] train episode 3883: winner = 1, steps = 4\n",
      "05:35:49 [INFO] train episode 3884: winner = 0, steps = 8\n",
      "05:35:53 [INFO] train episode 3885: winner = 1, steps = 6\n",
      "05:35:53 [INFO] train episode 3886: winner = 0, steps = 8\n",
      "05:35:54 [INFO] train episode 3887: winner = 1, steps = 4\n",
      "05:35:57 [INFO] train episode 3888: winner = 0, steps = 8\n",
      "05:35:59 [INFO] train episode 3889: winner = 1, steps = 4\n",
      "05:35:59 [INFO] train episode 3890: winner = 0, steps = 8\n",
      "05:36:01 [INFO] train episode 3891: winner = 1, steps = 6\n",
      "05:36:02 [INFO] train episode 3892: winner = 1, steps = 4\n",
      "05:36:02 [INFO] train episode 3893: winner = 1, steps = 4\n",
      "05:36:02 [INFO] train episode 3894: winner = 1, steps = 4\n",
      "05:36:03 [INFO] train episode 3895: winner = 1, steps = 4\n",
      "05:36:03 [INFO] train episode 3896: winner = -1, steps = 7\n",
      "05:36:04 [INFO] train episode 3897: winner = 1, steps = 6\n",
      "05:36:04 [INFO] train episode 3898: winner = 1, steps = 4\n",
      "05:36:05 [INFO] train episode 3899: winner = 1, steps = 6\n",
      "05:36:08 [INFO] train episode 3900: winner = 0, steps = 8\n",
      "05:36:08 [INFO] train episode 3901: winner = 0, steps = 8\n",
      "05:36:08 [INFO] train episode 3902: winner = 1, steps = 6\n",
      "05:36:10 [INFO] train episode 3903: winner = 1, steps = 4\n",
      "05:36:11 [INFO] train episode 3904: winner = 0, steps = 8\n",
      "05:36:13 [INFO] train episode 3905: winner = 0, steps = 8\n",
      "05:36:15 [INFO] train episode 3906: winner = 1, steps = 6\n",
      "05:36:17 [INFO] train episode 3907: winner = 1, steps = 4\n",
      "05:36:17 [INFO] train episode 3908: winner = 0, steps = 8\n",
      "05:36:18 [INFO] train episode 3909: winner = 1, steps = 4\n",
      "05:36:18 [INFO] train episode 3910: winner = 1, steps = 4\n",
      "05:36:18 [INFO] train episode 3911: winner = 1, steps = 4\n",
      "05:36:23 [INFO] train episode 3912: winner = 1, steps = 8\n",
      "05:36:23 [INFO] train episode 3913: winner = 1, steps = 4\n",
      "05:36:26 [INFO] train episode 3914: winner = -1, steps = 7\n",
      "05:36:28 [INFO] train episode 3915: winner = 0, steps = 8\n",
      "05:36:29 [INFO] train episode 3916: winner = 1, steps = 6\n",
      "05:36:32 [INFO] train episode 3917: winner = 0, steps = 8\n",
      "05:36:33 [INFO] train episode 3918: winner = 1, steps = 4\n",
      "05:36:34 [INFO] train episode 3919: winner = 1, steps = 6\n",
      "05:36:35 [INFO] train episode 3920: winner = 1, steps = 6\n",
      "05:36:35 [INFO] train episode 3921: winner = 1, steps = 6\n",
      "05:36:36 [INFO] train episode 3922: winner = 1, steps = 6\n",
      "05:36:36 [INFO] train episode 3923: winner = -1, steps = 5\n",
      "05:36:37 [INFO] train episode 3924: winner = -1, steps = 7\n",
      "05:36:42 [INFO] train episode 3925: winner = 1, steps = 6\n",
      "05:36:42 [INFO] test episode 3925:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:37:03 [INFO] step 0：player 1, action (0, 1)\n",
      "+o+\n",
      "+++\n",
      "+++\n",
      "05:37:21 [INFO] step 1：player -1, action (1, 2)\n",
      "+o+\n",
      "++x\n",
      "+++\n",
      "05:37:31 [INFO] step 2：player 1, action (1, 0)\n",
      "+o+\n",
      "o+x\n",
      "+++\n",
      "05:37:43 [INFO] step 3：player -1, action (0, 2)\n",
      "+ox\n",
      "o+x\n",
      "+++\n",
      "05:37:44 [INFO] step 4：player 1, action (2, 2)\n",
      "+ox\n",
      "o+x\n",
      "++o\n",
      "05:37:44 [INFO] step 5：player -1, action (2, 1)\n",
      "+ox\n",
      "o+x\n",
      "+xo\n",
      "05:37:44 [INFO] step 6：player 1, action (0, 0)\n",
      "oox\n",
      "o+x\n",
      "+xo\n",
      "05:37:44 [INFO] step 7：player -1, action (2, 0)\n",
      "oox\n",
      "o+x\n",
      "xxo\n",
      "05:37:44 [INFO] step 8：player 1, action (1, 1)\n",
      "oox\n",
      "oox\n",
      "xxo\n",
      "05:37:44 [INFO] test episode 3925: winner = 1, steps = 8\n",
      "05:38:17 [INFO] train episode 3926: winner = 0, steps = 8\n",
      "05:38:41 [INFO] train episode 3927: winner = 1, steps = 6\n",
      "05:39:16 [INFO] train episode 3928: winner = -1, steps = 5\n",
      "05:39:23 [INFO] train episode 3929: winner = 1, steps = 4\n",
      "05:39:41 [INFO] train episode 3930: winner = 0, steps = 8\n",
      "05:40:11 [INFO] train episode 3931: winner = 1, steps = 6\n",
      "05:40:20 [INFO] train episode 3932: winner = -1, steps = 5\n",
      "05:40:50 [INFO] train episode 3933: winner = 1, steps = 6\n",
      "05:41:06 [INFO] train episode 3934: winner = 1, steps = 4\n",
      "05:41:31 [INFO] train episode 3935: winner = 0, steps = 8\n",
      "05:41:35 [INFO] train episode 3936: winner = 1, steps = 6\n",
      "05:41:41 [INFO] train episode 3937: winner = 1, steps = 4\n",
      "05:41:47 [INFO] train episode 3938: winner = 1, steps = 8\n",
      "05:41:56 [INFO] train episode 3939: winner = 0, steps = 8\n",
      "05:42:01 [INFO] train episode 3940: winner = 1, steps = 4\n",
      "05:42:09 [INFO] train episode 3941: winner = 0, steps = 8\n",
      "05:42:27 [INFO] train episode 3942: winner = 1, steps = 8\n",
      "05:42:29 [INFO] train episode 3943: winner = 1, steps = 6\n",
      "05:42:36 [INFO] train episode 3944: winner = 0, steps = 8\n",
      "05:42:39 [INFO] train episode 3945: winner = 1, steps = 4\n",
      "05:42:40 [INFO] train episode 3946: winner = 1, steps = 6\n",
      "05:42:48 [INFO] train episode 3947: winner = 0, steps = 8\n",
      "05:42:55 [INFO] train episode 3948: winner = 1, steps = 4\n",
      "05:42:56 [INFO] train episode 3949: winner = 1, steps = 4\n",
      "05:43:06 [INFO] train episode 3950: winner = -1, steps = 7\n",
      "05:43:08 [INFO] train episode 3951: winner = 0, steps = 8\n",
      "05:43:09 [INFO] train episode 3952: winner = 1, steps = 4\n",
      "05:43:12 [INFO] train episode 3953: winner = 1, steps = 6\n",
      "05:43:17 [INFO] train episode 3954: winner = 1, steps = 8\n",
      "05:43:23 [INFO] train episode 3955: winner = 1, steps = 6\n",
      "05:43:40 [INFO] train episode 3956: winner = 1, steps = 4\n",
      "05:43:46 [INFO] train episode 3957: winner = 0, steps = 8\n",
      "05:43:54 [INFO] train episode 3958: winner = 1, steps = 6\n",
      "05:44:03 [INFO] train episode 3959: winner = 1, steps = 8\n",
      "05:44:06 [INFO] train episode 3960: winner = 1, steps = 4\n",
      "05:44:15 [INFO] train episode 3961: winner = 0, steps = 8\n",
      "05:44:18 [INFO] train episode 3962: winner = 1, steps = 6\n",
      "05:44:24 [INFO] train episode 3963: winner = 0, steps = 8\n",
      "05:44:28 [INFO] train episode 3964: winner = 1, steps = 6\n",
      "05:44:28 [INFO] train episode 3965: winner = 1, steps = 4\n",
      "05:44:33 [INFO] train episode 3966: winner = 0, steps = 8\n",
      "05:44:36 [INFO] train episode 3967: winner = 1, steps = 6\n",
      "05:44:37 [INFO] train episode 3968: winner = 1, steps = 6\n",
      "05:44:38 [INFO] train episode 3969: winner = 1, steps = 6\n",
      "05:44:39 [INFO] train episode 3970: winner = 0, steps = 8\n",
      "05:44:41 [INFO] train episode 3971: winner = -1, steps = 7\n",
      "05:44:45 [INFO] train episode 3972: winner = 0, steps = 8\n",
      "05:44:52 [INFO] train episode 3973: winner = 0, steps = 8\n",
      "05:44:57 [INFO] train episode 3974: winner = 1, steps = 4\n",
      "05:45:02 [INFO] train episode 3975: winner = 1, steps = 6\n",
      "05:45:08 [INFO] train episode 3976: winner = -1, steps = 7\n",
      "05:45:10 [INFO] train episode 3977: winner = 1, steps = 4\n",
      "05:45:12 [INFO] train episode 3978: winner = 1, steps = 6\n",
      "05:45:22 [INFO] train episode 3979: winner = -1, steps = 5\n",
      "05:45:26 [INFO] train episode 3980: winner = 0, steps = 8\n",
      "05:45:30 [INFO] train episode 3981: winner = 1, steps = 6\n",
      "05:45:31 [INFO] train episode 3982: winner = 0, steps = 8\n",
      "05:45:34 [INFO] train episode 3983: winner = -1, steps = 7\n",
      "05:45:34 [INFO] train episode 3984: winner = 1, steps = 6\n",
      "05:45:40 [INFO] train episode 3985: winner = 0, steps = 8\n",
      "05:45:41 [INFO] train episode 3986: winner = -1, steps = 7\n",
      "05:45:41 [INFO] train episode 3987: winner = 1, steps = 6\n",
      "05:45:44 [INFO] train episode 3988: winner = -1, steps = 5\n",
      "05:45:48 [INFO] train episode 3989: winner = 0, steps = 8\n",
      "05:45:51 [INFO] train episode 3990: winner = -1, steps = 5\n",
      "05:45:52 [INFO] train episode 3991: winner = -1, steps = 5\n",
      "05:45:55 [INFO] train episode 3992: winner = 0, steps = 8\n",
      "05:45:56 [INFO] train episode 3993: winner = 0, steps = 8\n",
      "05:46:03 [INFO] train episode 3994: winner = 0, steps = 8\n",
      "05:46:03 [INFO] train episode 3995: winner = 0, steps = 8\n",
      "05:46:05 [INFO] train episode 3996: winner = 1, steps = 4\n",
      "05:46:06 [INFO] train episode 3997: winner = 1, steps = 4\n",
      "05:46:07 [INFO] train episode 3998: winner = -1, steps = 7\n",
      "05:46:10 [INFO] train episode 3999: winner = 1, steps = 4\n",
      "05:46:18 [INFO] train episode 4000: winner = 1, steps = 8\n",
      "05:46:18 [INFO] train episode 4001: winner = 1, steps = 4\n",
      "05:46:19 [INFO] train episode 4002: winner = 1, steps = 4\n",
      "05:46:22 [INFO] train episode 4003: winner = -1, steps = 7\n",
      "05:46:22 [INFO] train episode 4004: winner = 1, steps = 6\n",
      "05:46:23 [INFO] train episode 4005: winner = 1, steps = 4\n",
      "05:46:24 [INFO] train episode 4006: winner = 0, steps = 8\n",
      "05:46:25 [INFO] train episode 4007: winner = -1, steps = 7\n",
      "05:46:27 [INFO] train episode 4008: winner = 1, steps = 6\n",
      "05:46:27 [INFO] train episode 4009: winner = 1, steps = 6\n",
      "05:46:27 [INFO] train episode 4010: winner = 0, steps = 8\n",
      "05:46:28 [INFO] train episode 4011: winner = 1, steps = 6\n",
      "05:46:28 [INFO] train episode 4012: winner = 1, steps = 6\n",
      "05:46:28 [INFO] train episode 4013: winner = 1, steps = 4\n",
      "05:46:32 [INFO] train episode 4014: winner = 0, steps = 8\n",
      "05:46:33 [INFO] train episode 4015: winner = 0, steps = 8\n",
      "05:46:34 [INFO] train episode 4016: winner = 1, steps = 6\n",
      "05:46:34 [INFO] train episode 4017: winner = 0, steps = 8\n",
      "05:46:39 [INFO] train episode 4018: winner = 0, steps = 8\n",
      "05:46:41 [INFO] train episode 4019: winner = 0, steps = 8\n",
      "05:46:41 [INFO] train episode 4020: winner = 0, steps = 8\n",
      "05:46:41 [INFO] train episode 4021: winner = 1, steps = 4\n",
      "05:46:43 [INFO] train episode 4022: winner = 1, steps = 6\n",
      "05:46:45 [INFO] train episode 4023: winner = -1, steps = 7\n",
      "05:46:47 [INFO] train episode 4024: winner = 0, steps = 8\n",
      "05:46:48 [INFO] train episode 4025: winner = -1, steps = 5\n",
      "05:46:49 [INFO] train episode 4026: winner = 1, steps = 6\n",
      "05:46:49 [INFO] train episode 4027: winner = 1, steps = 4\n",
      "05:46:49 [INFO] train episode 4028: winner = 1, steps = 4\n",
      "05:46:50 [INFO] train episode 4029: winner = 1, steps = 6\n",
      "05:46:51 [INFO] train episode 4030: winner = 1, steps = 4\n",
      "05:46:51 [INFO] train episode 4031: winner = 1, steps = 4\n",
      "05:46:54 [INFO] train episode 4032: winner = 0, steps = 8\n",
      "05:47:00 [INFO] train episode 4033: winner = 1, steps = 6\n",
      "05:47:00 [INFO] train episode 4034: winner = 1, steps = 4\n",
      "05:47:02 [INFO] train episode 4035: winner = 1, steps = 4\n",
      "05:47:07 [INFO] train episode 4036: winner = 0, steps = 8\n",
      "05:47:09 [INFO] train episode 4037: winner = 0, steps = 8\n",
      "05:47:09 [INFO] train episode 4038: winner = 1, steps = 6\n",
      "05:47:09 [INFO] train episode 4039: winner = 1, steps = 6\n",
      "05:47:11 [INFO] train episode 4040: winner = 1, steps = 8\n",
      "05:47:11 [INFO] train episode 4041: winner = 1, steps = 4\n",
      "05:47:11 [INFO] train episode 4042: winner = 1, steps = 4\n",
      "05:47:11 [INFO] train episode 4043: winner = 1, steps = 6\n",
      "05:47:16 [INFO] train episode 4044: winner = 0, steps = 8\n",
      "05:47:18 [INFO] train episode 4045: winner = 0, steps = 8\n",
      "05:47:19 [INFO] train episode 4046: winner = 1, steps = 6\n",
      "05:47:19 [INFO] train episode 4047: winner = 1, steps = 6\n",
      "05:47:19 [INFO] train episode 4048: winner = 0, steps = 8\n",
      "05:47:20 [INFO] train episode 4049: winner = 1, steps = 6\n",
      "05:47:22 [INFO] train episode 4050: winner = 0, steps = 8\n",
      "05:47:22 [INFO] train episode 4051: winner = 1, steps = 4\n",
      "05:47:24 [INFO] train episode 4052: winner = 0, steps = 8\n",
      "05:47:24 [INFO] train episode 4053: winner = 0, steps = 8\n",
      "05:47:24 [INFO] train episode 4054: winner = -1, steps = 7\n",
      "05:47:24 [INFO] train episode 4055: winner = 1, steps = 4\n",
      "05:47:25 [INFO] train episode 4056: winner = 1, steps = 6\n",
      "05:47:27 [INFO] train episode 4057: winner = 1, steps = 6\n",
      "05:47:29 [INFO] train episode 4058: winner = 0, steps = 8\n",
      "05:47:31 [INFO] train episode 4059: winner = 0, steps = 8\n",
      "05:47:31 [INFO] train episode 4060: winner = 1, steps = 4\n",
      "05:47:35 [INFO] train episode 4061: winner = 0, steps = 8\n",
      "05:47:35 [INFO] train episode 4062: winner = -1, steps = 5\n",
      "05:47:36 [INFO] train episode 4063: winner = 1, steps = 6\n",
      "05:47:36 [INFO] test episode 4063:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:47:58 [INFO] step 0：player 1, action (2, 0)\n",
      "+++\n",
      "+++\n",
      "o++\n",
      "05:48:17 [INFO] step 1：player -1, action (1, 1)\n",
      "+++\n",
      "+x+\n",
      "o++\n",
      "05:48:31 [INFO] step 2：player 1, action (2, 2)\n",
      "+++\n",
      "+x+\n",
      "o+o\n",
      "05:48:35 [INFO] step 3：player -1, action (0, 0)\n",
      "x++\n",
      "+x+\n",
      "o+o\n",
      "05:48:36 [INFO] step 4：player 1, action (2, 1)\n",
      "x++\n",
      "+x+\n",
      "ooo\n",
      "05:48:36 [INFO] test episode 4063: winner = 1, steps = 4\n",
      "05:49:13 [INFO] train episode 4064: winner = 0, steps = 8\n",
      "05:49:20 [INFO] train episode 4065: winner = 1, steps = 6\n",
      "05:49:44 [INFO] train episode 4066: winner = 1, steps = 6\n",
      "05:50:15 [INFO] train episode 4067: winner = 1, steps = 6\n",
      "05:50:38 [INFO] train episode 4068: winner = 1, steps = 6\n",
      "05:50:55 [INFO] train episode 4069: winner = -1, steps = 7\n",
      "05:51:13 [INFO] train episode 4070: winner = 0, steps = 8\n",
      "05:51:44 [INFO] train episode 4071: winner = 0, steps = 8\n",
      "05:52:01 [INFO] train episode 4072: winner = -1, steps = 7\n",
      "05:52:23 [INFO] train episode 4073: winner = 1, steps = 8\n",
      "05:52:27 [INFO] train episode 4074: winner = 1, steps = 4\n",
      "05:52:31 [INFO] train episode 4075: winner = 1, steps = 6\n",
      "05:52:43 [INFO] train episode 4076: winner = -1, steps = 7\n",
      "05:52:46 [INFO] train episode 4077: winner = 0, steps = 8\n",
      "05:52:47 [INFO] train episode 4078: winner = 0, steps = 8\n",
      "05:52:54 [INFO] train episode 4079: winner = -1, steps = 7\n",
      "05:52:54 [INFO] train episode 4080: winner = 0, steps = 8\n",
      "05:52:58 [INFO] train episode 4081: winner = -1, steps = 7\n",
      "05:52:59 [INFO] train episode 4082: winner = -1, steps = 7\n",
      "05:53:20 [INFO] train episode 4083: winner = 1, steps = 6\n",
      "05:53:22 [INFO] train episode 4084: winner = 1, steps = 4\n",
      "05:53:29 [INFO] train episode 4085: winner = 1, steps = 6\n",
      "05:53:35 [INFO] train episode 4086: winner = 1, steps = 4\n",
      "05:53:43 [INFO] train episode 4087: winner = -1, steps = 7\n",
      "05:53:46 [INFO] train episode 4088: winner = 1, steps = 4\n",
      "05:53:47 [INFO] train episode 4089: winner = 1, steps = 6\n",
      "05:53:48 [INFO] train episode 4090: winner = 1, steps = 4\n",
      "05:53:50 [INFO] train episode 4091: winner = 1, steps = 6\n",
      "05:53:55 [INFO] train episode 4092: winner = 0, steps = 8\n",
      "05:53:59 [INFO] train episode 4093: winner = 1, steps = 4\n",
      "05:54:05 [INFO] train episode 4094: winner = 1, steps = 6\n",
      "05:54:27 [INFO] train episode 4095: winner = 0, steps = 8\n",
      "05:54:31 [INFO] train episode 4096: winner = 0, steps = 8\n",
      "05:54:32 [INFO] train episode 4097: winner = 1, steps = 8\n",
      "05:54:35 [INFO] train episode 4098: winner = 1, steps = 6\n",
      "05:54:41 [INFO] train episode 4099: winner = -1, steps = 5\n",
      "05:54:46 [INFO] train episode 4100: winner = 1, steps = 4\n",
      "05:54:48 [INFO] train episode 4101: winner = 0, steps = 8\n",
      "05:54:50 [INFO] train episode 4102: winner = -1, steps = 7\n",
      "05:54:53 [INFO] train episode 4103: winner = 1, steps = 4\n",
      "05:55:05 [INFO] train episode 4104: winner = -1, steps = 7\n",
      "05:55:08 [INFO] train episode 4105: winner = 1, steps = 4\n",
      "05:55:10 [INFO] train episode 4106: winner = 1, steps = 6\n",
      "05:55:17 [INFO] train episode 4107: winner = 1, steps = 6\n",
      "05:55:21 [INFO] train episode 4108: winner = 1, steps = 6\n",
      "05:55:28 [INFO] train episode 4109: winner = 1, steps = 4\n",
      "05:55:31 [INFO] train episode 4110: winner = 0, steps = 8\n",
      "05:55:46 [INFO] train episode 4111: winner = 1, steps = 6\n",
      "05:55:47 [INFO] train episode 4112: winner = 1, steps = 4\n",
      "05:55:48 [INFO] train episode 4113: winner = 1, steps = 6\n",
      "05:55:54 [INFO] train episode 4114: winner = 0, steps = 8\n",
      "05:56:01 [INFO] train episode 4115: winner = 0, steps = 8\n",
      "05:56:02 [INFO] train episode 4116: winner = 1, steps = 6\n",
      "05:56:04 [INFO] train episode 4117: winner = 1, steps = 4\n",
      "05:56:04 [INFO] train episode 4118: winner = -1, steps = 5\n",
      "05:56:11 [INFO] train episode 4119: winner = 1, steps = 6\n",
      "05:56:14 [INFO] train episode 4120: winner = 1, steps = 6\n",
      "05:56:19 [INFO] train episode 4121: winner = 1, steps = 6\n",
      "05:56:23 [INFO] train episode 4122: winner = 0, steps = 8\n",
      "05:56:26 [INFO] train episode 4123: winner = 0, steps = 8\n",
      "05:56:28 [INFO] train episode 4124: winner = 0, steps = 8\n",
      "05:56:32 [INFO] train episode 4125: winner = 1, steps = 4\n",
      "05:56:32 [INFO] train episode 4126: winner = 1, steps = 6\n",
      "05:56:35 [INFO] train episode 4127: winner = 0, steps = 8\n",
      "05:56:36 [INFO] train episode 4128: winner = 0, steps = 8\n",
      "05:56:36 [INFO] train episode 4129: winner = 1, steps = 6\n",
      "05:56:37 [INFO] train episode 4130: winner = 1, steps = 6\n",
      "05:56:38 [INFO] train episode 4131: winner = 1, steps = 6\n",
      "05:56:42 [INFO] train episode 4132: winner = 0, steps = 8\n",
      "05:56:43 [INFO] train episode 4133: winner = 1, steps = 6\n",
      "05:56:43 [INFO] train episode 4134: winner = 1, steps = 6\n",
      "05:56:46 [INFO] train episode 4135: winner = 1, steps = 6\n",
      "05:56:46 [INFO] train episode 4136: winner = 0, steps = 8\n",
      "05:56:47 [INFO] train episode 4137: winner = 0, steps = 8\n",
      "05:56:49 [INFO] train episode 4138: winner = 0, steps = 8\n",
      "05:56:51 [INFO] train episode 4139: winner = 1, steps = 6\n",
      "05:56:53 [INFO] train episode 4140: winner = 1, steps = 4\n",
      "05:56:56 [INFO] train episode 4141: winner = 1, steps = 6\n",
      "05:56:57 [INFO] train episode 4142: winner = 0, steps = 8\n",
      "05:57:00 [INFO] train episode 4143: winner = 1, steps = 6\n",
      "05:57:04 [INFO] train episode 4144: winner = 0, steps = 8\n",
      "05:57:05 [INFO] train episode 4145: winner = 1, steps = 6\n",
      "05:57:08 [INFO] train episode 4146: winner = 1, steps = 4\n",
      "05:57:08 [INFO] train episode 4147: winner = 1, steps = 6\n",
      "05:57:10 [INFO] train episode 4148: winner = 1, steps = 6\n",
      "05:57:13 [INFO] train episode 4149: winner = 0, steps = 8\n",
      "05:57:17 [INFO] train episode 4150: winner = 0, steps = 8\n",
      "05:57:18 [INFO] train episode 4151: winner = 0, steps = 8\n",
      "05:57:18 [INFO] train episode 4152: winner = 1, steps = 4\n",
      "05:57:19 [INFO] train episode 4153: winner = -1, steps = 5\n",
      "05:57:19 [INFO] train episode 4154: winner = -1, steps = 5\n",
      "05:57:19 [INFO] train episode 4155: winner = 1, steps = 6\n",
      "05:57:19 [INFO] train episode 4156: winner = 0, steps = 8\n",
      "05:57:23 [INFO] train episode 4157: winner = 1, steps = 6\n",
      "05:57:23 [INFO] train episode 4158: winner = 1, steps = 6\n",
      "05:57:24 [INFO] train episode 4159: winner = 0, steps = 8\n",
      "05:57:24 [INFO] train episode 4160: winner = 1, steps = 6\n",
      "05:57:25 [INFO] train episode 4161: winner = 1, steps = 4\n",
      "05:57:25 [INFO] train episode 4162: winner = 0, steps = 8\n",
      "05:57:25 [INFO] train episode 4163: winner = 1, steps = 4\n",
      "05:57:26 [INFO] train episode 4164: winner = 1, steps = 4\n",
      "05:57:26 [INFO] train episode 4165: winner = 1, steps = 6\n",
      "05:57:28 [INFO] train episode 4166: winner = 1, steps = 6\n",
      "05:57:29 [INFO] train episode 4167: winner = 0, steps = 8\n",
      "05:57:30 [INFO] train episode 4168: winner = 1, steps = 6\n",
      "05:57:31 [INFO] train episode 4169: winner = 1, steps = 6\n",
      "05:57:33 [INFO] train episode 4170: winner = 1, steps = 6\n",
      "05:57:35 [INFO] train episode 4171: winner = 1, steps = 6\n",
      "05:57:41 [INFO] train episode 4172: winner = 1, steps = 8\n",
      "05:57:42 [INFO] train episode 4173: winner = 1, steps = 4\n",
      "05:57:42 [INFO] train episode 4174: winner = 0, steps = 8\n",
      "05:57:43 [INFO] train episode 4175: winner = 1, steps = 6\n",
      "05:57:43 [INFO] train episode 4176: winner = 1, steps = 6\n",
      "05:57:44 [INFO] train episode 4177: winner = 1, steps = 4\n",
      "05:57:45 [INFO] train episode 4178: winner = 1, steps = 6\n",
      "05:57:48 [INFO] train episode 4179: winner = 0, steps = 8\n",
      "05:57:48 [INFO] train episode 4180: winner = 1, steps = 4\n",
      "05:57:55 [INFO] train episode 4181: winner = 0, steps = 8\n",
      "05:57:55 [INFO] train episode 4182: winner = 1, steps = 4\n",
      "05:57:56 [INFO] train episode 4183: winner = 1, steps = 6\n",
      "05:57:57 [INFO] train episode 4184: winner = 1, steps = 6\n",
      "05:57:59 [INFO] train episode 4185: winner = 0, steps = 8\n",
      "05:58:01 [INFO] train episode 4186: winner = 0, steps = 8\n",
      "05:58:03 [INFO] train episode 4187: winner = 1, steps = 6\n",
      "05:58:04 [INFO] train episode 4188: winner = 0, steps = 8\n",
      "05:58:05 [INFO] train episode 4189: winner = 0, steps = 8\n",
      "05:58:06 [INFO] train episode 4190: winner = 1, steps = 6\n",
      "05:58:07 [INFO] train episode 4191: winner = 0, steps = 8\n",
      "05:58:08 [INFO] train episode 4192: winner = 1, steps = 6\n",
      "05:58:08 [INFO] train episode 4193: winner = 0, steps = 8\n",
      "05:58:08 [INFO] train episode 4194: winner = 1, steps = 6\n",
      "05:58:08 [INFO] train episode 4195: winner = 1, steps = 4\n",
      "05:58:10 [INFO] train episode 4196: winner = -1, steps = 7\n",
      "05:58:11 [INFO] train episode 4197: winner = 0, steps = 8\n",
      "05:58:14 [INFO] train episode 4198: winner = 1, steps = 6\n",
      "05:58:15 [INFO] train episode 4199: winner = 0, steps = 8\n",
      "05:58:15 [INFO] test episode 4199:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "05:58:36 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "05:58:52 [INFO] step 1：player -1, action (2, 0)\n",
      "+++\n",
      "+o+\n",
      "x++\n",
      "05:59:08 [INFO] step 2：player 1, action (2, 1)\n",
      "+++\n",
      "+o+\n",
      "xo+\n",
      "05:59:11 [INFO] step 3：player -1, action (0, 1)\n",
      "+x+\n",
      "+o+\n",
      "xo+\n",
      "05:59:12 [INFO] step 4：player 1, action (0, 0)\n",
      "ox+\n",
      "+o+\n",
      "xo+\n",
      "05:59:12 [INFO] step 5：player -1, action (2, 2)\n",
      "ox+\n",
      "+o+\n",
      "xox\n",
      "05:59:12 [INFO] step 6：player 1, action (1, 2)\n",
      "ox+\n",
      "+oo\n",
      "xox\n",
      "05:59:13 [INFO] step 7：player -1, action (1, 0)\n",
      "ox+\n",
      "xoo\n",
      "xox\n",
      "05:59:13 [INFO] step 8：player 1, action (0, 2)\n",
      "oxo\n",
      "xoo\n",
      "xox\n",
      "05:59:13 [INFO] test episode 4199: winner = 0, steps = 8\n",
      "05:59:46 [INFO] train episode 4200: winner = 0, steps = 8\n",
      "06:00:10 [INFO] train episode 4201: winner = 1, steps = 4\n",
      "06:00:38 [INFO] train episode 4202: winner = 1, steps = 6\n",
      "06:01:21 [INFO] train episode 4203: winner = -1, steps = 5\n",
      "06:01:23 [INFO] train episode 4204: winner = 1, steps = 6\n",
      "06:01:36 [INFO] train episode 4205: winner = 1, steps = 4\n",
      "06:02:09 [INFO] train episode 4206: winner = 0, steps = 8\n",
      "06:02:36 [INFO] train episode 4207: winner = -1, steps = 7\n",
      "06:02:47 [INFO] train episode 4208: winner = 1, steps = 6\n",
      "06:03:03 [INFO] train episode 4209: winner = 1, steps = 8\n",
      "06:03:30 [INFO] train episode 4210: winner = 1, steps = 4\n",
      "06:03:35 [INFO] train episode 4211: winner = 1, steps = 4\n",
      "06:03:44 [INFO] train episode 4212: winner = 1, steps = 6\n",
      "06:03:49 [INFO] train episode 4213: winner = 1, steps = 6\n",
      "06:03:54 [INFO] train episode 4214: winner = 1, steps = 4\n",
      "06:04:01 [INFO] train episode 4215: winner = 1, steps = 6\n",
      "06:04:01 [INFO] train episode 4216: winner = 1, steps = 4\n",
      "06:04:09 [INFO] train episode 4217: winner = 0, steps = 8\n",
      "06:04:11 [INFO] train episode 4218: winner = 1, steps = 4\n",
      "06:04:12 [INFO] train episode 4219: winner = 1, steps = 6\n",
      "06:04:19 [INFO] train episode 4220: winner = -1, steps = 7\n",
      "06:04:21 [INFO] train episode 4221: winner = 1, steps = 6\n",
      "06:04:22 [INFO] train episode 4222: winner = 1, steps = 4\n",
      "06:04:27 [INFO] train episode 4223: winner = 0, steps = 8\n",
      "06:04:27 [INFO] train episode 4224: winner = 1, steps = 6\n",
      "06:04:30 [INFO] train episode 4225: winner = 1, steps = 4\n",
      "06:04:33 [INFO] train episode 4226: winner = 1, steps = 4\n",
      "06:04:38 [INFO] train episode 4227: winner = 1, steps = 6\n",
      "06:04:49 [INFO] train episode 4228: winner = 0, steps = 8\n",
      "06:05:00 [INFO] train episode 4229: winner = -1, steps = 7\n",
      "06:05:01 [INFO] train episode 4230: winner = -1, steps = 7\n",
      "06:05:05 [INFO] train episode 4231: winner = -1, steps = 7\n",
      "06:05:15 [INFO] train episode 4232: winner = 1, steps = 4\n",
      "06:05:15 [INFO] train episode 4233: winner = 1, steps = 4\n",
      "06:05:17 [INFO] train episode 4234: winner = 1, steps = 4\n",
      "06:05:18 [INFO] train episode 4235: winner = 1, steps = 4\n",
      "06:05:25 [INFO] train episode 4236: winner = 1, steps = 6\n",
      "06:05:26 [INFO] train episode 4237: winner = 0, steps = 8\n",
      "06:05:31 [INFO] train episode 4238: winner = 0, steps = 8\n",
      "06:05:34 [INFO] train episode 4239: winner = 1, steps = 4\n",
      "06:05:35 [INFO] train episode 4240: winner = 1, steps = 4\n",
      "06:05:38 [INFO] train episode 4241: winner = 1, steps = 6\n",
      "06:05:42 [INFO] train episode 4242: winner = -1, steps = 5\n",
      "06:05:43 [INFO] train episode 4243: winner = 1, steps = 4\n",
      "06:05:47 [INFO] train episode 4244: winner = -1, steps = 5\n",
      "06:05:56 [INFO] train episode 4245: winner = 0, steps = 8\n",
      "06:05:57 [INFO] train episode 4246: winner = 1, steps = 4\n",
      "06:06:00 [INFO] train episode 4247: winner = 1, steps = 6\n",
      "06:06:06 [INFO] train episode 4248: winner = 0, steps = 8\n",
      "06:06:09 [INFO] train episode 4249: winner = 1, steps = 6\n",
      "06:06:12 [INFO] train episode 4250: winner = 1, steps = 4\n",
      "06:06:20 [INFO] train episode 4251: winner = 0, steps = 8\n",
      "06:06:25 [INFO] train episode 4252: winner = 1, steps = 6\n",
      "06:06:25 [INFO] train episode 4253: winner = 1, steps = 4\n",
      "06:06:27 [INFO] train episode 4254: winner = 1, steps = 6\n",
      "06:06:27 [INFO] train episode 4255: winner = 1, steps = 4\n",
      "06:06:27 [INFO] train episode 4256: winner = 1, steps = 6\n",
      "06:06:33 [INFO] train episode 4257: winner = 0, steps = 8\n",
      "06:06:35 [INFO] train episode 4258: winner = 1, steps = 6\n",
      "06:06:35 [INFO] train episode 4259: winner = -1, steps = 7\n",
      "06:06:35 [INFO] train episode 4260: winner = 0, steps = 8\n",
      "06:06:39 [INFO] train episode 4261: winner = 1, steps = 8\n",
      "06:06:39 [INFO] train episode 4262: winner = 1, steps = 6\n",
      "06:06:40 [INFO] train episode 4263: winner = 0, steps = 8\n",
      "06:06:47 [INFO] train episode 4264: winner = 1, steps = 6\n",
      "06:06:47 [INFO] train episode 4265: winner = 0, steps = 8\n",
      "06:06:49 [INFO] train episode 4266: winner = 0, steps = 8\n",
      "06:06:55 [INFO] train episode 4267: winner = 0, steps = 8\n",
      "06:06:55 [INFO] train episode 4268: winner = 1, steps = 4\n",
      "06:06:57 [INFO] train episode 4269: winner = 1, steps = 4\n",
      "06:07:00 [INFO] train episode 4270: winner = 1, steps = 6\n",
      "06:07:00 [INFO] train episode 4271: winner = 1, steps = 6\n",
      "06:07:04 [INFO] train episode 4272: winner = 0, steps = 8\n",
      "06:07:08 [INFO] train episode 4273: winner = 0, steps = 8\n",
      "06:07:10 [INFO] train episode 4274: winner = 1, steps = 4\n",
      "06:07:22 [INFO] train episode 4275: winner = 0, steps = 8\n",
      "06:07:23 [INFO] train episode 4276: winner = 0, steps = 8\n",
      "06:07:25 [INFO] train episode 4277: winner = 1, steps = 6\n",
      "06:07:27 [INFO] train episode 4278: winner = 1, steps = 6\n",
      "06:07:31 [INFO] train episode 4279: winner = 1, steps = 4\n",
      "06:07:34 [INFO] train episode 4280: winner = 1, steps = 6\n",
      "06:07:37 [INFO] train episode 4281: winner = 1, steps = 4\n",
      "06:07:40 [INFO] train episode 4282: winner = 0, steps = 8\n",
      "06:07:40 [INFO] train episode 4283: winner = 0, steps = 8\n",
      "06:07:41 [INFO] train episode 4284: winner = 1, steps = 4\n",
      "06:07:43 [INFO] train episode 4285: winner = 1, steps = 6\n",
      "06:07:45 [INFO] train episode 4286: winner = -1, steps = 5\n",
      "06:07:48 [INFO] train episode 4287: winner = 1, steps = 4\n",
      "06:07:48 [INFO] train episode 4288: winner = 1, steps = 6\n",
      "06:07:50 [INFO] train episode 4289: winner = -1, steps = 5\n",
      "06:07:52 [INFO] train episode 4290: winner = 1, steps = 6\n",
      "06:07:54 [INFO] train episode 4291: winner = 0, steps = 8\n",
      "06:07:55 [INFO] train episode 4292: winner = 1, steps = 6\n",
      "06:07:56 [INFO] train episode 4293: winner = 1, steps = 6\n",
      "06:07:58 [INFO] train episode 4294: winner = 0, steps = 8\n",
      "06:08:00 [INFO] train episode 4295: winner = 0, steps = 8\n",
      "06:08:04 [INFO] train episode 4296: winner = 0, steps = 8\n",
      "06:08:06 [INFO] train episode 4297: winner = -1, steps = 5\n",
      "06:08:06 [INFO] train episode 4298: winner = -1, steps = 7\n",
      "06:08:07 [INFO] train episode 4299: winner = 1, steps = 6\n",
      "06:08:11 [INFO] train episode 4300: winner = 1, steps = 6\n",
      "06:08:11 [INFO] train episode 4301: winner = 1, steps = 4\n",
      "06:08:11 [INFO] train episode 4302: winner = 1, steps = 4\n",
      "06:08:12 [INFO] train episode 4303: winner = 0, steps = 8\n",
      "06:08:14 [INFO] train episode 4304: winner = 1, steps = 8\n",
      "06:08:15 [INFO] train episode 4305: winner = 1, steps = 6\n",
      "06:08:15 [INFO] train episode 4306: winner = 1, steps = 4\n",
      "06:08:15 [INFO] train episode 4307: winner = 1, steps = 4\n",
      "06:08:15 [INFO] train episode 4308: winner = 1, steps = 6\n",
      "06:08:17 [INFO] train episode 4309: winner = 1, steps = 6\n",
      "06:08:19 [INFO] train episode 4310: winner = 0, steps = 8\n",
      "06:08:21 [INFO] train episode 4311: winner = 0, steps = 8\n",
      "06:08:23 [INFO] train episode 4312: winner = 1, steps = 6\n",
      "06:08:23 [INFO] train episode 4313: winner = 1, steps = 4\n",
      "06:08:28 [INFO] train episode 4314: winner = 0, steps = 8\n",
      "06:08:29 [INFO] train episode 4315: winner = 1, steps = 6\n",
      "06:08:30 [INFO] train episode 4316: winner = 1, steps = 6\n",
      "06:08:31 [INFO] train episode 4317: winner = 1, steps = 6\n",
      "06:08:32 [INFO] train episode 4318: winner = 1, steps = 8\n",
      "06:08:33 [INFO] train episode 4319: winner = 0, steps = 8\n",
      "06:08:34 [INFO] train episode 4320: winner = 1, steps = 8\n",
      "06:08:35 [INFO] train episode 4321: winner = -1, steps = 5\n",
      "06:08:36 [INFO] train episode 4322: winner = 1, steps = 6\n",
      "06:08:36 [INFO] train episode 4323: winner = 1, steps = 8\n",
      "06:08:37 [INFO] train episode 4324: winner = 1, steps = 4\n",
      "06:08:37 [INFO] train episode 4325: winner = 1, steps = 6\n",
      "06:08:37 [INFO] train episode 4326: winner = 1, steps = 4\n",
      "06:08:38 [INFO] train episode 4327: winner = 1, steps = 4\n",
      "06:08:41 [INFO] train episode 4328: winner = 0, steps = 8\n",
      "06:08:42 [INFO] train episode 4329: winner = 0, steps = 8\n",
      "06:08:42 [INFO] train episode 4330: winner = 1, steps = 4\n",
      "06:08:42 [INFO] train episode 4331: winner = 1, steps = 6\n",
      "06:08:44 [INFO] train episode 4332: winner = -1, steps = 5\n",
      "06:08:44 [INFO] train episode 4333: winner = 1, steps = 4\n",
      "06:08:45 [INFO] train episode 4334: winner = 0, steps = 8\n",
      "06:08:46 [INFO] train episode 4335: winner = -1, steps = 5\n",
      "06:08:47 [INFO] train episode 4336: winner = 0, steps = 8\n",
      "06:08:48 [INFO] train episode 4337: winner = 1, steps = 4\n",
      "06:08:49 [INFO] train episode 4338: winner = 1, steps = 4\n",
      "06:08:49 [INFO] train episode 4339: winner = 0, steps = 8\n",
      "06:08:51 [INFO] train episode 4340: winner = 0, steps = 8\n",
      "06:08:52 [INFO] train episode 4341: winner = 0, steps = 8\n",
      "06:08:52 [INFO] test episode 4341:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "06:09:14 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "06:09:33 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+++\n",
      "++o\n",
      "06:09:48 [INFO] step 2：player 1, action (0, 2)\n",
      "+xo\n",
      "+++\n",
      "++o\n",
      "06:09:52 [INFO] step 3：player -1, action (2, 0)\n",
      "+xo\n",
      "+++\n",
      "x+o\n",
      "06:09:54 [INFO] step 4：player 1, action (1, 2)\n",
      "+xo\n",
      "++o\n",
      "x+o\n",
      "06:09:54 [INFO] test episode 4341: winner = 1, steps = 4\n",
      "06:10:19 [INFO] train episode 4342: winner = 1, steps = 6\n",
      "06:10:48 [INFO] train episode 4343: winner = 1, steps = 6\n",
      "06:11:18 [INFO] train episode 4344: winner = 0, steps = 8\n",
      "06:11:47 [INFO] train episode 4345: winner = -1, steps = 5\n",
      "06:12:27 [INFO] train episode 4346: winner = -1, steps = 5\n",
      "06:12:40 [INFO] train episode 4347: winner = -1, steps = 7\n",
      "06:12:54 [INFO] train episode 4348: winner = -1, steps = 5\n",
      "06:13:00 [INFO] train episode 4349: winner = 1, steps = 6\n",
      "06:13:04 [INFO] train episode 4350: winner = 1, steps = 6\n",
      "06:13:26 [INFO] train episode 4351: winner = 1, steps = 6\n",
      "06:14:02 [INFO] train episode 4352: winner = 1, steps = 8\n",
      "06:14:06 [INFO] train episode 4353: winner = -1, steps = 5\n",
      "06:14:21 [INFO] train episode 4354: winner = 0, steps = 8\n",
      "06:14:28 [INFO] train episode 4355: winner = -1, steps = 5\n",
      "06:14:33 [INFO] train episode 4356: winner = 1, steps = 4\n",
      "06:14:43 [INFO] train episode 4357: winner = 0, steps = 8\n",
      "06:14:45 [INFO] train episode 4358: winner = -1, steps = 5\n",
      "06:14:59 [INFO] train episode 4359: winner = 1, steps = 6\n",
      "06:15:03 [INFO] train episode 4360: winner = 1, steps = 6\n",
      "06:15:05 [INFO] train episode 4361: winner = 1, steps = 4\n",
      "06:15:11 [INFO] train episode 4362: winner = 1, steps = 6\n",
      "06:15:14 [INFO] train episode 4363: winner = 1, steps = 4\n",
      "06:15:19 [INFO] train episode 4364: winner = -1, steps = 5\n",
      "06:15:20 [INFO] train episode 4365: winner = 1, steps = 6\n",
      "06:15:28 [INFO] train episode 4366: winner = 0, steps = 8\n",
      "06:15:32 [INFO] train episode 4367: winner = 1, steps = 6\n",
      "06:15:33 [INFO] train episode 4368: winner = 1, steps = 4\n",
      "06:15:37 [INFO] train episode 4369: winner = -1, steps = 7\n",
      "06:15:42 [INFO] train episode 4370: winner = -1, steps = 5\n",
      "06:15:50 [INFO] train episode 4371: winner = 0, steps = 8\n",
      "06:15:52 [INFO] train episode 4372: winner = 1, steps = 6\n",
      "06:15:57 [INFO] train episode 4373: winner = 1, steps = 6\n",
      "06:15:58 [INFO] train episode 4374: winner = 1, steps = 6\n",
      "06:16:01 [INFO] train episode 4375: winner = 0, steps = 8\n",
      "06:16:10 [INFO] train episode 4376: winner = -1, steps = 7\n",
      "06:16:12 [INFO] train episode 4377: winner = 1, steps = 4\n",
      "06:16:14 [INFO] train episode 4378: winner = 1, steps = 6\n",
      "06:16:19 [INFO] train episode 4379: winner = 1, steps = 6\n",
      "06:16:26 [INFO] train episode 4380: winner = 1, steps = 6\n",
      "06:16:28 [INFO] train episode 4381: winner = -1, steps = 5\n",
      "06:16:30 [INFO] train episode 4382: winner = 0, steps = 8\n",
      "06:16:31 [INFO] train episode 4383: winner = 1, steps = 6\n",
      "06:16:37 [INFO] train episode 4384: winner = 1, steps = 6\n",
      "06:16:42 [INFO] train episode 4385: winner = 1, steps = 4\n",
      "06:16:44 [INFO] train episode 4386: winner = 1, steps = 8\n",
      "06:16:52 [INFO] train episode 4387: winner = -1, steps = 7\n",
      "06:16:55 [INFO] train episode 4388: winner = -1, steps = 7\n",
      "06:17:01 [INFO] train episode 4389: winner = 1, steps = 6\n",
      "06:17:01 [INFO] train episode 4390: winner = 0, steps = 8\n",
      "06:17:04 [INFO] train episode 4391: winner = 1, steps = 6\n",
      "06:17:08 [INFO] train episode 4392: winner = 0, steps = 8\n",
      "06:17:11 [INFO] train episode 4393: winner = -1, steps = 7\n",
      "06:17:12 [INFO] train episode 4394: winner = 0, steps = 8\n",
      "06:17:17 [INFO] train episode 4395: winner = 0, steps = 8\n",
      "06:17:17 [INFO] train episode 4396: winner = 1, steps = 4\n",
      "06:17:18 [INFO] train episode 4397: winner = 1, steps = 4\n",
      "06:17:20 [INFO] train episode 4398: winner = 0, steps = 8\n",
      "06:17:21 [INFO] train episode 4399: winner = 1, steps = 4\n",
      "06:17:23 [INFO] train episode 4400: winner = 1, steps = 4\n",
      "06:17:23 [INFO] train episode 4401: winner = 1, steps = 6\n",
      "06:17:26 [INFO] train episode 4402: winner = 0, steps = 8\n",
      "06:17:32 [INFO] train episode 4403: winner = 0, steps = 8\n",
      "06:17:32 [INFO] train episode 4404: winner = 0, steps = 8\n",
      "06:17:32 [INFO] train episode 4405: winner = 0, steps = 8\n",
      "06:17:33 [INFO] train episode 4406: winner = 1, steps = 6\n",
      "06:17:34 [INFO] train episode 4407: winner = 1, steps = 6\n",
      "06:17:34 [INFO] train episode 4408: winner = 1, steps = 6\n",
      "06:17:37 [INFO] train episode 4409: winner = 1, steps = 6\n",
      "06:17:43 [INFO] train episode 4410: winner = -1, steps = 7\n",
      "06:17:45 [INFO] train episode 4411: winner = 1, steps = 6\n",
      "06:17:46 [INFO] train episode 4412: winner = 1, steps = 4\n",
      "06:17:46 [INFO] train episode 4413: winner = 0, steps = 8\n",
      "06:17:46 [INFO] train episode 4414: winner = 1, steps = 4\n",
      "06:17:50 [INFO] train episode 4415: winner = 1, steps = 6\n",
      "06:17:53 [INFO] train episode 4416: winner = 1, steps = 4\n",
      "06:17:54 [INFO] train episode 4417: winner = 1, steps = 6\n",
      "06:17:54 [INFO] train episode 4418: winner = 1, steps = 6\n",
      "06:17:57 [INFO] train episode 4419: winner = 0, steps = 8\n",
      "06:17:57 [INFO] train episode 4420: winner = 1, steps = 6\n",
      "06:18:01 [INFO] train episode 4421: winner = -1, steps = 5\n",
      "06:18:02 [INFO] train episode 4422: winner = 1, steps = 6\n",
      "06:18:06 [INFO] train episode 4423: winner = 0, steps = 8\n",
      "06:18:07 [INFO] train episode 4424: winner = 1, steps = 4\n",
      "06:18:08 [INFO] train episode 4425: winner = 0, steps = 8\n",
      "06:18:13 [INFO] train episode 4426: winner = 0, steps = 8\n",
      "06:18:15 [INFO] train episode 4427: winner = 0, steps = 8\n",
      "06:18:16 [INFO] train episode 4428: winner = 1, steps = 6\n",
      "06:18:18 [INFO] train episode 4429: winner = 0, steps = 8\n",
      "06:18:22 [INFO] train episode 4430: winner = 0, steps = 8\n",
      "06:18:23 [INFO] train episode 4431: winner = 1, steps = 8\n",
      "06:18:23 [INFO] train episode 4432: winner = 0, steps = 8\n",
      "06:18:24 [INFO] train episode 4433: winner = 1, steps = 4\n",
      "06:18:25 [INFO] train episode 4434: winner = 0, steps = 8\n",
      "06:18:25 [INFO] train episode 4435: winner = 1, steps = 6\n",
      "06:18:27 [INFO] train episode 4436: winner = 1, steps = 4\n",
      "06:18:27 [INFO] train episode 4437: winner = 1, steps = 6\n",
      "06:18:28 [INFO] train episode 4438: winner = 1, steps = 6\n",
      "06:18:30 [INFO] train episode 4439: winner = -1, steps = 7\n",
      "06:18:34 [INFO] train episode 4440: winner = 0, steps = 8\n",
      "06:18:36 [INFO] train episode 4441: winner = 1, steps = 4\n",
      "06:18:37 [INFO] train episode 4442: winner = 0, steps = 8\n",
      "06:18:39 [INFO] train episode 4443: winner = 0, steps = 8\n",
      "06:18:40 [INFO] train episode 4444: winner = -1, steps = 5\n",
      "06:18:41 [INFO] train episode 4445: winner = 1, steps = 4\n",
      "06:18:42 [INFO] train episode 4446: winner = 1, steps = 6\n",
      "06:18:42 [INFO] train episode 4447: winner = 1, steps = 6\n",
      "06:18:42 [INFO] train episode 4448: winner = 1, steps = 4\n",
      "06:18:42 [INFO] train episode 4449: winner = 0, steps = 8\n",
      "06:18:43 [INFO] train episode 4450: winner = 1, steps = 6\n",
      "06:18:43 [INFO] train episode 4451: winner = 1, steps = 4\n",
      "06:18:44 [INFO] train episode 4452: winner = -1, steps = 7\n",
      "06:18:47 [INFO] train episode 4453: winner = 0, steps = 8\n",
      "06:18:49 [INFO] train episode 4454: winner = 1, steps = 4\n",
      "06:18:50 [INFO] train episode 4455: winner = 1, steps = 4\n",
      "06:18:52 [INFO] train episode 4456: winner = 0, steps = 8\n",
      "06:18:53 [INFO] train episode 4457: winner = 0, steps = 8\n",
      "06:18:53 [INFO] train episode 4458: winner = 1, steps = 6\n",
      "06:18:54 [INFO] train episode 4459: winner = 1, steps = 8\n",
      "06:18:57 [INFO] train episode 4460: winner = 0, steps = 8\n",
      "06:18:57 [INFO] train episode 4461: winner = 1, steps = 4\n",
      "06:19:02 [INFO] train episode 4462: winner = 1, steps = 4\n",
      "06:19:05 [INFO] train episode 4463: winner = 0, steps = 8\n",
      "06:19:06 [INFO] train episode 4464: winner = -1, steps = 7\n",
      "06:19:07 [INFO] train episode 4465: winner = 1, steps = 6\n",
      "06:19:08 [INFO] train episode 4466: winner = 1, steps = 4\n",
      "06:19:09 [INFO] train episode 4467: winner = -1, steps = 7\n",
      "06:19:10 [INFO] train episode 4468: winner = 1, steps = 6\n",
      "06:19:12 [INFO] train episode 4469: winner = 0, steps = 8\n",
      "06:19:13 [INFO] train episode 4470: winner = 1, steps = 6\n",
      "06:19:13 [INFO] train episode 4471: winner = 0, steps = 8\n",
      "06:19:13 [INFO] train episode 4472: winner = 0, steps = 8\n",
      "06:19:14 [INFO] train episode 4473: winner = 1, steps = 6\n",
      "06:19:14 [INFO] train episode 4474: winner = 1, steps = 4\n",
      "06:19:14 [INFO] train episode 4475: winner = 0, steps = 8\n",
      "06:19:15 [INFO] train episode 4476: winner = 1, steps = 6\n",
      "06:19:16 [INFO] train episode 4477: winner = 1, steps = 6\n",
      "06:19:16 [INFO] train episode 4478: winner = 1, steps = 4\n",
      "06:19:16 [INFO] train episode 4479: winner = 1, steps = 6\n",
      "06:19:17 [INFO] train episode 4480: winner = 1, steps = 4\n",
      "06:19:17 [INFO] test episode 4480:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "06:19:39 [INFO] step 0：player 1, action (1, 2)\n",
      "+++\n",
      "++o\n",
      "+++\n",
      "06:19:58 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "++o\n",
      "+++\n",
      "06:20:13 [INFO] step 2：player 1, action (2, 2)\n",
      "++x\n",
      "++o\n",
      "++o\n",
      "06:20:20 [INFO] step 3：player -1, action (0, 0)\n",
      "x+x\n",
      "++o\n",
      "++o\n",
      "06:20:20 [INFO] step 4：player 1, action (1, 1)\n",
      "x+x\n",
      "+oo\n",
      "++o\n",
      "06:20:21 [INFO] step 5：player -1, action (0, 1)\n",
      "xxx\n",
      "+oo\n",
      "++o\n",
      "06:20:21 [INFO] test episode 4480: winner = -1, steps = 5\n",
      "06:20:48 [INFO] train episode 4481: winner = 1, steps = 6\n",
      "06:21:15 [INFO] train episode 4482: winner = 1, steps = 4\n",
      "06:21:45 [INFO] train episode 4483: winner = 1, steps = 6\n",
      "06:22:10 [INFO] train episode 4484: winner = 1, steps = 6\n",
      "06:22:38 [INFO] train episode 4485: winner = 0, steps = 8\n",
      "06:22:53 [INFO] train episode 4486: winner = 0, steps = 8\n",
      "06:23:00 [INFO] train episode 4487: winner = 0, steps = 8\n",
      "06:23:15 [INFO] train episode 4488: winner = 1, steps = 6\n",
      "06:23:27 [INFO] train episode 4489: winner = -1, steps = 5\n",
      "06:23:37 [INFO] train episode 4490: winner = -1, steps = 5\n",
      "06:23:44 [INFO] train episode 4491: winner = 1, steps = 6\n",
      "06:24:10 [INFO] train episode 4492: winner = 1, steps = 4\n",
      "06:24:24 [INFO] train episode 4493: winner = 1, steps = 6\n",
      "06:24:35 [INFO] train episode 4494: winner = 0, steps = 8\n",
      "06:24:40 [INFO] train episode 4495: winner = -1, steps = 7\n",
      "06:24:48 [INFO] train episode 4496: winner = 0, steps = 8\n",
      "06:24:54 [INFO] train episode 4497: winner = 0, steps = 8\n",
      "06:25:00 [INFO] train episode 4498: winner = 1, steps = 6\n",
      "06:25:08 [INFO] train episode 4499: winner = -1, steps = 5\n",
      "06:25:08 [INFO] train episode 4500: winner = 1, steps = 6\n",
      "06:25:17 [INFO] train episode 4501: winner = 1, steps = 8\n",
      "06:25:26 [INFO] train episode 4502: winner = 1, steps = 6\n",
      "06:25:30 [INFO] train episode 4503: winner = 1, steps = 4\n",
      "06:25:37 [INFO] train episode 4504: winner = -1, steps = 5\n",
      "06:25:43 [INFO] train episode 4505: winner = 1, steps = 6\n",
      "06:25:48 [INFO] train episode 4506: winner = 1, steps = 4\n",
      "06:25:50 [INFO] train episode 4507: winner = 1, steps = 8\n",
      "06:25:53 [INFO] train episode 4508: winner = 1, steps = 8\n",
      "06:25:57 [INFO] train episode 4509: winner = 1, steps = 4\n",
      "06:26:15 [INFO] train episode 4510: winner = 1, steps = 6\n",
      "06:26:21 [INFO] train episode 4511: winner = 1, steps = 4\n",
      "06:26:24 [INFO] train episode 4512: winner = 1, steps = 4\n",
      "06:26:27 [INFO] train episode 4513: winner = 1, steps = 4\n",
      "06:26:27 [INFO] train episode 4514: winner = 0, steps = 8\n",
      "06:26:28 [INFO] train episode 4515: winner = 1, steps = 6\n",
      "06:26:30 [INFO] train episode 4516: winner = 0, steps = 8\n",
      "06:26:31 [INFO] train episode 4517: winner = 0, steps = 8\n",
      "06:26:32 [INFO] train episode 4518: winner = 1, steps = 6\n",
      "06:26:34 [INFO] train episode 4519: winner = 1, steps = 4\n",
      "06:26:39 [INFO] train episode 4520: winner = 0, steps = 8\n",
      "06:26:48 [INFO] train episode 4521: winner = -1, steps = 7\n",
      "06:26:56 [INFO] train episode 4522: winner = 1, steps = 6\n",
      "06:26:57 [INFO] train episode 4523: winner = 1, steps = 6\n",
      "06:27:07 [INFO] train episode 4524: winner = 0, steps = 8\n",
      "06:27:11 [INFO] train episode 4525: winner = -1, steps = 5\n",
      "06:27:12 [INFO] train episode 4526: winner = 0, steps = 8\n",
      "06:27:16 [INFO] train episode 4527: winner = 1, steps = 6\n",
      "06:27:19 [INFO] train episode 4528: winner = 1, steps = 6\n",
      "06:27:24 [INFO] train episode 4529: winner = 0, steps = 8\n",
      "06:27:28 [INFO] train episode 4530: winner = 0, steps = 8\n",
      "06:27:33 [INFO] train episode 4531: winner = 1, steps = 8\n",
      "06:27:33 [INFO] train episode 4532: winner = 1, steps = 4\n",
      "06:27:34 [INFO] train episode 4533: winner = 0, steps = 8\n",
      "06:27:35 [INFO] train episode 4534: winner = 0, steps = 8\n",
      "06:27:36 [INFO] train episode 4535: winner = 1, steps = 4\n",
      "06:27:38 [INFO] train episode 4536: winner = 1, steps = 4\n",
      "06:27:43 [INFO] train episode 4537: winner = 1, steps = 6\n",
      "06:27:44 [INFO] train episode 4538: winner = 0, steps = 8\n",
      "06:27:45 [INFO] train episode 4539: winner = 1, steps = 8\n",
      "06:27:45 [INFO] train episode 4540: winner = 1, steps = 4\n",
      "06:27:47 [INFO] train episode 4541: winner = -1, steps = 5\n",
      "06:27:48 [INFO] train episode 4542: winner = 1, steps = 6\n",
      "06:27:52 [INFO] train episode 4543: winner = 1, steps = 4\n",
      "06:27:56 [INFO] train episode 4544: winner = 1, steps = 6\n",
      "06:28:00 [INFO] train episode 4545: winner = 1, steps = 4\n",
      "06:28:00 [INFO] train episode 4546: winner = 1, steps = 6\n",
      "06:28:03 [INFO] train episode 4547: winner = 1, steps = 8\n",
      "06:28:11 [INFO] train episode 4548: winner = 0, steps = 8\n",
      "06:28:14 [INFO] train episode 4549: winner = 1, steps = 8\n",
      "06:28:18 [INFO] train episode 4550: winner = 1, steps = 6\n",
      "06:28:22 [INFO] train episode 4551: winner = -1, steps = 7\n",
      "06:28:22 [INFO] train episode 4552: winner = 1, steps = 4\n",
      "06:28:24 [INFO] train episode 4553: winner = 1, steps = 6\n",
      "06:28:28 [INFO] train episode 4554: winner = 0, steps = 8\n",
      "06:28:30 [INFO] train episode 4555: winner = 1, steps = 6\n",
      "06:28:30 [INFO] train episode 4556: winner = 1, steps = 4\n",
      "06:28:31 [INFO] train episode 4557: winner = 1, steps = 6\n",
      "06:28:31 [INFO] train episode 4558: winner = 1, steps = 6\n",
      "06:28:34 [INFO] train episode 4559: winner = 1, steps = 4\n",
      "06:28:34 [INFO] train episode 4560: winner = 1, steps = 6\n",
      "06:28:35 [INFO] train episode 4561: winner = 1, steps = 4\n",
      "06:28:36 [INFO] train episode 4562: winner = 1, steps = 4\n",
      "06:28:37 [INFO] train episode 4563: winner = 0, steps = 8\n",
      "06:28:42 [INFO] train episode 4564: winner = 0, steps = 8\n",
      "06:28:47 [INFO] train episode 4565: winner = 0, steps = 8\n",
      "06:28:47 [INFO] train episode 4566: winner = 1, steps = 4\n",
      "06:28:48 [INFO] train episode 4567: winner = 1, steps = 6\n",
      "06:28:49 [INFO] train episode 4568: winner = 1, steps = 4\n",
      "06:28:50 [INFO] train episode 4569: winner = 1, steps = 4\n",
      "06:28:53 [INFO] train episode 4570: winner = 0, steps = 8\n",
      "06:28:56 [INFO] train episode 4571: winner = 1, steps = 8\n",
      "06:28:57 [INFO] train episode 4572: winner = 1, steps = 6\n",
      "06:28:58 [INFO] train episode 4573: winner = 1, steps = 6\n",
      "06:28:58 [INFO] train episode 4574: winner = 1, steps = 6\n",
      "06:28:58 [INFO] train episode 4575: winner = 0, steps = 8\n",
      "06:29:01 [INFO] train episode 4576: winner = 0, steps = 8\n",
      "06:29:01 [INFO] train episode 4577: winner = 1, steps = 4\n",
      "06:29:03 [INFO] train episode 4578: winner = 0, steps = 8\n",
      "06:29:05 [INFO] train episode 4579: winner = 0, steps = 8\n",
      "06:29:10 [INFO] train episode 4580: winner = -1, steps = 7\n",
      "06:29:10 [INFO] train episode 4581: winner = 1, steps = 4\n",
      "06:29:10 [INFO] train episode 4582: winner = 1, steps = 6\n",
      "06:29:10 [INFO] train episode 4583: winner = 1, steps = 4\n",
      "06:29:10 [INFO] train episode 4584: winner = 1, steps = 4\n",
      "06:29:12 [INFO] train episode 4585: winner = -1, steps = 7\n",
      "06:29:13 [INFO] train episode 4586: winner = -1, steps = 7\n",
      "06:29:14 [INFO] train episode 4587: winner = 1, steps = 6\n",
      "06:29:14 [INFO] train episode 4588: winner = 1, steps = 6\n",
      "06:29:15 [INFO] train episode 4589: winner = 0, steps = 8\n",
      "06:29:15 [INFO] train episode 4590: winner = 1, steps = 4\n",
      "06:29:19 [INFO] train episode 4591: winner = 1, steps = 6\n",
      "06:29:22 [INFO] train episode 4592: winner = -1, steps = 7\n",
      "06:29:23 [INFO] train episode 4593: winner = 1, steps = 8\n",
      "06:29:25 [INFO] train episode 4594: winner = 1, steps = 6\n",
      "06:29:25 [INFO] train episode 4595: winner = 1, steps = 6\n",
      "06:29:26 [INFO] train episode 4596: winner = 1, steps = 6\n",
      "06:29:28 [INFO] train episode 4597: winner = 0, steps = 8\n",
      "06:29:28 [INFO] train episode 4598: winner = 1, steps = 6\n",
      "06:29:30 [INFO] train episode 4599: winner = 1, steps = 6\n",
      "06:29:31 [INFO] train episode 4600: winner = 1, steps = 6\n",
      "06:29:31 [INFO] train episode 4601: winner = 1, steps = 6\n",
      "06:29:32 [INFO] train episode 4602: winner = 1, steps = 6\n",
      "06:29:33 [INFO] train episode 4603: winner = 1, steps = 6\n",
      "06:29:39 [INFO] train episode 4604: winner = 0, steps = 8\n",
      "06:29:43 [INFO] train episode 4605: winner = 1, steps = 4\n",
      "06:29:43 [INFO] train episode 4606: winner = 1, steps = 4\n",
      "06:29:43 [INFO] train episode 4607: winner = 1, steps = 4\n",
      "06:29:43 [INFO] train episode 4608: winner = 1, steps = 6\n",
      "06:29:45 [INFO] train episode 4609: winner = 0, steps = 8\n",
      "06:29:46 [INFO] train episode 4610: winner = 0, steps = 8\n",
      "06:29:47 [INFO] train episode 4611: winner = 1, steps = 6\n",
      "06:29:49 [INFO] train episode 4612: winner = 1, steps = 4\n",
      "06:29:53 [INFO] train episode 4613: winner = 0, steps = 8\n",
      "06:29:53 [INFO] train episode 4614: winner = 1, steps = 6\n",
      "06:29:53 [INFO] train episode 4615: winner = 1, steps = 6\n",
      "06:29:54 [INFO] train episode 4616: winner = 1, steps = 6\n",
      "06:29:54 [INFO] train episode 4617: winner = -1, steps = 7\n",
      "06:29:54 [INFO] train episode 4618: winner = 1, steps = 6\n",
      "06:29:56 [INFO] train episode 4619: winner = -1, steps = 7\n",
      "06:30:01 [INFO] train episode 4620: winner = 0, steps = 8\n",
      "06:30:01 [INFO] test episode 4620:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "06:30:22 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "06:30:41 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+o+\n",
      "+++\n",
      "06:30:55 [INFO] step 2：player 1, action (1, 0)\n",
      "+x+\n",
      "oo+\n",
      "+++\n",
      "06:30:59 [INFO] step 3：player -1, action (1, 2)\n",
      "+x+\n",
      "oox\n",
      "+++\n",
      "06:31:00 [INFO] step 4：player 1, action (2, 0)\n",
      "+x+\n",
      "oox\n",
      "o++\n",
      "06:31:00 [INFO] step 5：player -1, action (2, 1)\n",
      "+x+\n",
      "oox\n",
      "ox+\n",
      "06:31:00 [INFO] step 6：player 1, action (0, 2)\n",
      "+xo\n",
      "oox\n",
      "ox+\n",
      "06:31:00 [INFO] test episode 4620: winner = 1, steps = 6\n",
      "06:31:31 [INFO] train episode 4621: winner = -1, steps = 5\n",
      "06:32:05 [INFO] train episode 4622: winner = 0, steps = 8\n",
      "06:32:21 [INFO] train episode 4623: winner = 1, steps = 4\n",
      "06:32:49 [INFO] train episode 4624: winner = 0, steps = 8\n",
      "06:33:01 [INFO] train episode 4625: winner = 1, steps = 6\n",
      "06:33:21 [INFO] train episode 4626: winner = 1, steps = 4\n",
      "06:33:35 [INFO] train episode 4627: winner = 1, steps = 6\n",
      "06:34:08 [INFO] train episode 4628: winner = 1, steps = 6\n",
      "06:34:14 [INFO] train episode 4629: winner = 1, steps = 4\n",
      "06:34:21 [INFO] train episode 4630: winner = 0, steps = 8\n",
      "06:34:38 [INFO] train episode 4631: winner = 1, steps = 6\n",
      "06:34:58 [INFO] train episode 4632: winner = 0, steps = 8\n",
      "06:35:09 [INFO] train episode 4633: winner = -1, steps = 7\n",
      "06:35:15 [INFO] train episode 4634: winner = 0, steps = 8\n",
      "06:35:17 [INFO] train episode 4635: winner = 1, steps = 6\n",
      "06:35:17 [INFO] train episode 4636: winner = 1, steps = 6\n",
      "06:35:21 [INFO] train episode 4637: winner = -1, steps = 7\n",
      "06:35:32 [INFO] train episode 4638: winner = 0, steps = 8\n",
      "06:35:54 [INFO] train episode 4639: winner = 0, steps = 8\n",
      "06:36:10 [INFO] train episode 4640: winner = 1, steps = 6\n",
      "06:36:16 [INFO] train episode 4641: winner = 0, steps = 8\n",
      "06:36:29 [INFO] train episode 4642: winner = 1, steps = 6\n",
      "06:36:37 [INFO] train episode 4643: winner = 1, steps = 4\n",
      "06:36:41 [INFO] train episode 4644: winner = 1, steps = 6\n",
      "06:36:56 [INFO] train episode 4645: winner = 0, steps = 8\n",
      "06:36:59 [INFO] train episode 4646: winner = 1, steps = 4\n",
      "06:37:01 [INFO] train episode 4647: winner = 0, steps = 8\n",
      "06:37:09 [INFO] train episode 4648: winner = 0, steps = 8\n",
      "06:37:11 [INFO] train episode 4649: winner = 1, steps = 4\n",
      "06:37:13 [INFO] train episode 4650: winner = 1, steps = 6\n",
      "06:37:20 [INFO] train episode 4651: winner = 1, steps = 6\n",
      "06:37:23 [INFO] train episode 4652: winner = 1, steps = 4\n",
      "06:37:29 [INFO] train episode 4653: winner = 0, steps = 8\n",
      "06:37:35 [INFO] train episode 4654: winner = -1, steps = 5\n",
      "06:37:35 [INFO] train episode 4655: winner = 1, steps = 4\n",
      "06:37:44 [INFO] train episode 4656: winner = 1, steps = 6\n",
      "06:37:47 [INFO] train episode 4657: winner = 0, steps = 8\n",
      "06:37:52 [INFO] train episode 4658: winner = 0, steps = 8\n",
      "06:37:54 [INFO] train episode 4659: winner = 0, steps = 8\n",
      "06:37:56 [INFO] train episode 4660: winner = 1, steps = 4\n",
      "06:37:59 [INFO] train episode 4661: winner = -1, steps = 7\n",
      "06:37:59 [INFO] train episode 4662: winner = 1, steps = 6\n",
      "06:38:00 [INFO] train episode 4663: winner = 0, steps = 8\n",
      "06:38:04 [INFO] train episode 4664: winner = 0, steps = 8\n",
      "06:38:07 [INFO] train episode 4665: winner = -1, steps = 7\n",
      "06:38:07 [INFO] train episode 4666: winner = -1, steps = 5\n",
      "06:38:07 [INFO] train episode 4667: winner = 1, steps = 4\n",
      "06:38:08 [INFO] train episode 4668: winner = -1, steps = 7\n",
      "06:38:10 [INFO] train episode 4669: winner = 1, steps = 6\n",
      "06:38:11 [INFO] train episode 4670: winner = -1, steps = 5\n",
      "06:38:12 [INFO] train episode 4671: winner = 1, steps = 6\n",
      "06:38:19 [INFO] train episode 4672: winner = -1, steps = 5\n",
      "06:38:26 [INFO] train episode 4673: winner = 0, steps = 8\n",
      "06:38:29 [INFO] train episode 4674: winner = 1, steps = 6\n",
      "06:38:32 [INFO] train episode 4675: winner = 1, steps = 6\n",
      "06:38:34 [INFO] train episode 4676: winner = 0, steps = 8\n",
      "06:38:35 [INFO] train episode 4677: winner = 1, steps = 6\n",
      "06:38:37 [INFO] train episode 4678: winner = 0, steps = 8\n",
      "06:38:43 [INFO] train episode 4679: winner = 0, steps = 8\n",
      "06:38:44 [INFO] train episode 4680: winner = 1, steps = 8\n",
      "06:38:48 [INFO] train episode 4681: winner = 0, steps = 8\n",
      "06:38:48 [INFO] train episode 4682: winner = 1, steps = 6\n",
      "06:38:51 [INFO] train episode 4683: winner = 1, steps = 6\n",
      "06:38:54 [INFO] train episode 4684: winner = 0, steps = 8\n",
      "06:38:55 [INFO] train episode 4685: winner = -1, steps = 5\n",
      "06:39:00 [INFO] train episode 4686: winner = 0, steps = 8\n",
      "06:39:01 [INFO] train episode 4687: winner = 1, steps = 4\n",
      "06:39:03 [INFO] train episode 4688: winner = -1, steps = 5\n",
      "06:39:03 [INFO] train episode 4689: winner = 1, steps = 6\n",
      "06:39:04 [INFO] train episode 4690: winner = 0, steps = 8\n",
      "06:39:08 [INFO] train episode 4691: winner = 0, steps = 8\n",
      "06:39:08 [INFO] train episode 4692: winner = -1, steps = 5\n",
      "06:39:11 [INFO] train episode 4693: winner = 0, steps = 8\n",
      "06:39:12 [INFO] train episode 4694: winner = 1, steps = 6\n",
      "06:39:12 [INFO] train episode 4695: winner = 1, steps = 6\n",
      "06:39:13 [INFO] train episode 4696: winner = 1, steps = 6\n",
      "06:39:15 [INFO] train episode 4697: winner = 1, steps = 4\n",
      "06:39:15 [INFO] train episode 4698: winner = 1, steps = 6\n",
      "06:39:23 [INFO] train episode 4699: winner = 0, steps = 8\n",
      "06:39:23 [INFO] train episode 4700: winner = 0, steps = 8\n",
      "06:39:23 [INFO] train episode 4701: winner = 0, steps = 8\n",
      "06:39:24 [INFO] train episode 4702: winner = 1, steps = 6\n",
      "06:39:24 [INFO] train episode 4703: winner = 1, steps = 4\n",
      "06:39:25 [INFO] train episode 4704: winner = 1, steps = 8\n",
      "06:39:27 [INFO] train episode 4705: winner = 0, steps = 8\n",
      "06:39:27 [INFO] train episode 4706: winner = 0, steps = 8\n",
      "06:39:31 [INFO] train episode 4707: winner = 1, steps = 4\n",
      "06:39:34 [INFO] train episode 4708: winner = 0, steps = 8\n",
      "06:39:39 [INFO] train episode 4709: winner = 0, steps = 8\n",
      "06:39:40 [INFO] train episode 4710: winner = 0, steps = 8\n",
      "06:39:41 [INFO] train episode 4711: winner = 1, steps = 6\n",
      "06:39:43 [INFO] train episode 4712: winner = 1, steps = 4\n",
      "06:39:44 [INFO] train episode 4713: winner = 0, steps = 8\n",
      "06:39:46 [INFO] train episode 4714: winner = 0, steps = 8\n",
      "06:39:47 [INFO] train episode 4715: winner = -1, steps = 5\n",
      "06:39:51 [INFO] train episode 4716: winner = -1, steps = 7\n",
      "06:39:58 [INFO] train episode 4717: winner = 1, steps = 6\n",
      "06:39:58 [INFO] train episode 4718: winner = 1, steps = 6\n",
      "06:39:58 [INFO] train episode 4719: winner = 1, steps = 6\n",
      "06:39:59 [INFO] train episode 4720: winner = 0, steps = 8\n",
      "06:39:59 [INFO] train episode 4721: winner = 0, steps = 8\n",
      "06:40:00 [INFO] train episode 4722: winner = 1, steps = 4\n",
      "06:40:06 [INFO] train episode 4723: winner = -1, steps = 7\n",
      "06:40:08 [INFO] train episode 4724: winner = 1, steps = 6\n",
      "06:40:08 [INFO] train episode 4725: winner = 1, steps = 6\n",
      "06:40:08 [INFO] train episode 4726: winner = 1, steps = 8\n",
      "06:40:09 [INFO] train episode 4727: winner = 0, steps = 8\n",
      "06:40:10 [INFO] train episode 4728: winner = 0, steps = 8\n",
      "06:40:10 [INFO] train episode 4729: winner = 1, steps = 6\n",
      "06:40:10 [INFO] train episode 4730: winner = 0, steps = 8\n",
      "06:40:11 [INFO] train episode 4731: winner = -1, steps = 7\n",
      "06:40:13 [INFO] train episode 4732: winner = 0, steps = 8\n",
      "06:40:13 [INFO] train episode 4733: winner = 0, steps = 8\n",
      "06:40:15 [INFO] train episode 4734: winner = 1, steps = 8\n",
      "06:40:15 [INFO] train episode 4735: winner = 1, steps = 4\n",
      "06:40:16 [INFO] train episode 4736: winner = 1, steps = 4\n",
      "06:40:16 [INFO] train episode 4737: winner = 0, steps = 8\n",
      "06:40:17 [INFO] train episode 4738: winner = 1, steps = 4\n",
      "06:40:18 [INFO] train episode 4739: winner = 1, steps = 8\n",
      "06:40:20 [INFO] train episode 4740: winner = -1, steps = 5\n",
      "06:40:26 [INFO] train episode 4741: winner = 1, steps = 8\n",
      "06:40:26 [INFO] train episode 4742: winner = 1, steps = 4\n",
      "06:40:27 [INFO] train episode 4743: winner = 1, steps = 8\n",
      "06:40:29 [INFO] train episode 4744: winner = -1, steps = 5\n",
      "06:40:30 [INFO] train episode 4745: winner = 0, steps = 8\n",
      "06:40:30 [INFO] train episode 4746: winner = 0, steps = 8\n",
      "06:40:33 [INFO] train episode 4747: winner = 0, steps = 8\n",
      "06:40:34 [INFO] train episode 4748: winner = 0, steps = 8\n",
      "06:40:35 [INFO] train episode 4749: winner = 1, steps = 4\n",
      "06:40:35 [INFO] train episode 4750: winner = -1, steps = 7\n",
      "06:40:38 [INFO] train episode 4751: winner = 1, steps = 6\n",
      "06:40:38 [INFO] train episode 4752: winner = 0, steps = 8\n",
      "06:40:39 [INFO] train episode 4753: winner = 1, steps = 6\n",
      "06:40:39 [INFO] test episode 4753:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "06:41:01 [INFO] step 0：player 1, action (1, 1)\n",
      "+++\n",
      "+o+\n",
      "+++\n",
      "06:41:20 [INFO] step 1：player -1, action (0, 2)\n",
      "++x\n",
      "+o+\n",
      "+++\n",
      "06:41:34 [INFO] step 2：player 1, action (1, 2)\n",
      "++x\n",
      "+oo\n",
      "+++\n",
      "06:41:39 [INFO] step 3：player -1, action (1, 0)\n",
      "++x\n",
      "xoo\n",
      "+++\n",
      "06:41:39 [INFO] step 4：player 1, action (0, 0)\n",
      "o+x\n",
      "xoo\n",
      "+++\n",
      "06:41:40 [INFO] step 5：player -1, action (2, 1)\n",
      "o+x\n",
      "xoo\n",
      "+x+\n",
      "06:41:40 [INFO] step 6：player 1, action (2, 0)\n",
      "o+x\n",
      "xoo\n",
      "ox+\n",
      "06:41:40 [INFO] step 7：player -1, action (2, 2)\n",
      "o+x\n",
      "xoo\n",
      "oxx\n",
      "06:41:40 [INFO] step 8：player 1, action (0, 1)\n",
      "oox\n",
      "xoo\n",
      "oxx\n",
      "06:41:40 [INFO] test episode 4753: winner = 0, steps = 8\n",
      "06:42:13 [INFO] train episode 4754: winner = -1, steps = 7\n",
      "06:42:36 [INFO] train episode 4755: winner = 0, steps = 8\n",
      "06:42:57 [INFO] train episode 4756: winner = 1, steps = 4\n",
      "06:43:06 [INFO] train episode 4757: winner = 1, steps = 6\n",
      "06:43:36 [INFO] train episode 4758: winner = 1, steps = 6\n",
      "06:43:41 [INFO] train episode 4759: winner = 0, steps = 8\n",
      "06:44:15 [INFO] train episode 4760: winner = -1, steps = 7\n",
      "06:44:22 [INFO] train episode 4761: winner = 1, steps = 4\n",
      "06:44:43 [INFO] train episode 4762: winner = 1, steps = 4\n",
      "06:45:09 [INFO] train episode 4763: winner = 1, steps = 6\n",
      "06:45:14 [INFO] train episode 4764: winner = 1, steps = 4\n",
      "06:45:27 [INFO] train episode 4765: winner = 1, steps = 6\n",
      "06:45:48 [INFO] train episode 4766: winner = 1, steps = 4\n",
      "06:46:03 [INFO] train episode 4767: winner = 0, steps = 8\n",
      "06:46:10 [INFO] train episode 4768: winner = 1, steps = 6\n",
      "06:46:32 [INFO] train episode 4769: winner = 1, steps = 6\n",
      "06:46:38 [INFO] train episode 4770: winner = 0, steps = 8\n",
      "06:46:42 [INFO] train episode 4771: winner = 1, steps = 4\n",
      "06:46:50 [INFO] train episode 4772: winner = 1, steps = 4\n",
      "06:47:01 [INFO] train episode 4773: winner = 0, steps = 8\n",
      "06:47:19 [INFO] train episode 4774: winner = 0, steps = 8\n",
      "06:47:21 [INFO] train episode 4775: winner = 1, steps = 6\n",
      "06:47:23 [INFO] train episode 4776: winner = 1, steps = 6\n",
      "06:47:26 [INFO] train episode 4777: winner = -1, steps = 5\n",
      "06:47:29 [INFO] train episode 4778: winner = 1, steps = 4\n",
      "06:47:33 [INFO] train episode 4779: winner = 1, steps = 4\n",
      "06:47:42 [INFO] train episode 4780: winner = 1, steps = 4\n",
      "06:47:46 [INFO] train episode 4781: winner = -1, steps = 7\n",
      "06:47:49 [INFO] train episode 4782: winner = 0, steps = 8\n",
      "06:47:50 [INFO] train episode 4783: winner = 1, steps = 6\n",
      "06:47:54 [INFO] train episode 4784: winner = 1, steps = 6\n",
      "06:47:55 [INFO] train episode 4785: winner = 0, steps = 8\n",
      "06:47:57 [INFO] train episode 4786: winner = 1, steps = 6\n",
      "06:48:05 [INFO] train episode 4787: winner = 0, steps = 8\n",
      "06:48:07 [INFO] train episode 4788: winner = 0, steps = 8\n",
      "06:48:07 [INFO] train episode 4789: winner = 1, steps = 4\n",
      "06:48:08 [INFO] train episode 4790: winner = 1, steps = 4\n",
      "06:48:08 [INFO] train episode 4791: winner = 1, steps = 6\n",
      "06:48:16 [INFO] train episode 4792: winner = -1, steps = 5\n",
      "06:48:16 [INFO] train episode 4793: winner = 1, steps = 6\n",
      "06:48:22 [INFO] train episode 4794: winner = -1, steps = 5\n",
      "06:48:25 [INFO] train episode 4795: winner = -1, steps = 5\n",
      "06:48:32 [INFO] train episode 4796: winner = 0, steps = 8\n",
      "06:48:33 [INFO] train episode 4797: winner = 1, steps = 4\n",
      "06:48:35 [INFO] train episode 4798: winner = 1, steps = 4\n",
      "06:48:35 [INFO] train episode 4799: winner = 1, steps = 4\n",
      "06:48:39 [INFO] train episode 4800: winner = 0, steps = 8\n",
      "06:48:49 [INFO] train episode 4801: winner = 1, steps = 6\n",
      "06:48:51 [INFO] train episode 4802: winner = 1, steps = 4\n",
      "06:48:52 [INFO] train episode 4803: winner = 1, steps = 4\n",
      "06:48:55 [INFO] train episode 4804: winner = -1, steps = 5\n",
      "06:48:56 [INFO] train episode 4805: winner = 1, steps = 4\n",
      "06:48:59 [INFO] train episode 4806: winner = 1, steps = 6\n",
      "06:48:59 [INFO] train episode 4807: winner = 1, steps = 6\n",
      "06:49:00 [INFO] train episode 4808: winner = 1, steps = 6\n",
      "06:49:04 [INFO] train episode 4809: winner = -1, steps = 5\n",
      "06:49:05 [INFO] train episode 4810: winner = 1, steps = 6\n",
      "06:49:07 [INFO] train episode 4811: winner = 1, steps = 6\n",
      "06:49:13 [INFO] train episode 4812: winner = 0, steps = 8\n",
      "06:49:16 [INFO] train episode 4813: winner = 0, steps = 8\n",
      "06:49:17 [INFO] train episode 4814: winner = 1, steps = 4\n",
      "06:49:21 [INFO] train episode 4815: winner = 1, steps = 6\n",
      "06:49:23 [INFO] train episode 4816: winner = 1, steps = 6\n",
      "06:49:26 [INFO] train episode 4817: winner = 1, steps = 8\n",
      "06:49:35 [INFO] train episode 4818: winner = -1, steps = 7\n",
      "06:49:39 [INFO] train episode 4819: winner = 0, steps = 8\n",
      "06:49:43 [INFO] train episode 4820: winner = 1, steps = 6\n",
      "06:49:46 [INFO] train episode 4821: winner = -1, steps = 7\n",
      "06:49:48 [INFO] train episode 4822: winner = 1, steps = 6\n",
      "06:49:52 [INFO] train episode 4823: winner = 1, steps = 8\n",
      "06:49:55 [INFO] train episode 4824: winner = -1, steps = 5\n",
      "06:50:00 [INFO] train episode 4825: winner = 1, steps = 4\n",
      "06:50:00 [INFO] train episode 4826: winner = 0, steps = 8\n",
      "06:50:01 [INFO] train episode 4827: winner = 1, steps = 4\n",
      "06:50:01 [INFO] train episode 4828: winner = 1, steps = 8\n",
      "06:50:03 [INFO] train episode 4829: winner = -1, steps = 5\n",
      "06:50:05 [INFO] train episode 4830: winner = 1, steps = 4\n",
      "06:50:08 [INFO] train episode 4831: winner = 1, steps = 6\n",
      "06:50:08 [INFO] train episode 4832: winner = -1, steps = 5\n",
      "06:50:10 [INFO] train episode 4833: winner = 1, steps = 6\n",
      "06:50:11 [INFO] train episode 4834: winner = 1, steps = 6\n",
      "06:50:11 [INFO] train episode 4835: winner = 1, steps = 6\n",
      "06:50:12 [INFO] train episode 4836: winner = 1, steps = 8\n",
      "06:50:13 [INFO] train episode 4837: winner = 0, steps = 8\n",
      "06:50:13 [INFO] train episode 4838: winner = 1, steps = 6\n",
      "06:50:14 [INFO] train episode 4839: winner = 0, steps = 8\n",
      "06:50:15 [INFO] train episode 4840: winner = 1, steps = 8\n",
      "06:50:16 [INFO] train episode 4841: winner = -1, steps = 5\n",
      "06:50:18 [INFO] train episode 4842: winner = -1, steps = 7\n",
      "06:50:18 [INFO] train episode 4843: winner = 1, steps = 6\n",
      "06:50:18 [INFO] train episode 4844: winner = 1, steps = 4\n",
      "06:50:20 [INFO] train episode 4845: winner = 0, steps = 8\n",
      "06:50:24 [INFO] train episode 4846: winner = 0, steps = 8\n",
      "06:50:29 [INFO] train episode 4847: winner = 1, steps = 4\n",
      "06:50:29 [INFO] train episode 4848: winner = 0, steps = 8\n",
      "06:50:32 [INFO] train episode 4849: winner = 1, steps = 4\n",
      "06:50:34 [INFO] train episode 4850: winner = 0, steps = 8\n",
      "06:50:36 [INFO] train episode 4851: winner = 0, steps = 8\n",
      "06:50:37 [INFO] train episode 4852: winner = 1, steps = 6\n",
      "06:50:39 [INFO] train episode 4853: winner = 1, steps = 4\n",
      "06:50:39 [INFO] train episode 4854: winner = 1, steps = 4\n",
      "06:50:41 [INFO] train episode 4855: winner = 1, steps = 4\n",
      "06:50:42 [INFO] train episode 4856: winner = -1, steps = 7\n",
      "06:50:44 [INFO] train episode 4857: winner = 1, steps = 4\n",
      "06:50:48 [INFO] train episode 4858: winner = 0, steps = 8\n",
      "06:50:49 [INFO] train episode 4859: winner = -1, steps = 5\n",
      "06:50:52 [INFO] train episode 4860: winner = 0, steps = 8\n",
      "06:50:52 [INFO] train episode 4861: winner = 1, steps = 6\n",
      "06:50:54 [INFO] train episode 4862: winner = 1, steps = 6\n",
      "06:50:56 [INFO] train episode 4863: winner = 1, steps = 6\n",
      "06:50:57 [INFO] train episode 4864: winner = 0, steps = 8\n",
      "06:50:57 [INFO] train episode 4865: winner = 1, steps = 4\n",
      "06:51:02 [INFO] train episode 4866: winner = 0, steps = 8\n",
      "06:51:02 [INFO] train episode 4867: winner = 1, steps = 4\n",
      "06:51:03 [INFO] train episode 4868: winner = 1, steps = 6\n",
      "06:51:03 [INFO] train episode 4869: winner = 1, steps = 6\n",
      "06:51:09 [INFO] train episode 4870: winner = 0, steps = 8\n",
      "06:51:09 [INFO] train episode 4871: winner = 1, steps = 4\n",
      "06:51:09 [INFO] train episode 4872: winner = -1, steps = 5\n",
      "06:51:13 [INFO] train episode 4873: winner = 0, steps = 8\n",
      "06:51:14 [INFO] train episode 4874: winner = 1, steps = 4\n",
      "06:51:15 [INFO] train episode 4875: winner = -1, steps = 7\n",
      "06:51:16 [INFO] train episode 4876: winner = 1, steps = 6\n",
      "06:51:16 [INFO] train episode 4877: winner = 1, steps = 6\n",
      "06:51:18 [INFO] train episode 4878: winner = 0, steps = 8\n",
      "06:51:19 [INFO] train episode 4879: winner = 0, steps = 8\n",
      "06:51:22 [INFO] train episode 4880: winner = 0, steps = 8\n",
      "06:51:22 [INFO] train episode 4881: winner = 1, steps = 6\n",
      "06:51:23 [INFO] train episode 4882: winner = -1, steps = 5\n",
      "06:51:23 [INFO] train episode 4883: winner = 1, steps = 6\n",
      "06:51:25 [INFO] train episode 4884: winner = 1, steps = 6\n",
      "06:51:26 [INFO] train episode 4885: winner = 1, steps = 6\n",
      "06:51:27 [INFO] train episode 4886: winner = 0, steps = 8\n",
      "06:51:30 [INFO] train episode 4887: winner = 0, steps = 8\n",
      "06:51:30 [INFO] train episode 4888: winner = -1, steps = 5\n",
      "06:51:31 [INFO] train episode 4889: winner = 1, steps = 8\n",
      "06:51:35 [INFO] train episode 4890: winner = 0, steps = 8\n",
      "06:51:35 [INFO] train episode 4891: winner = 0, steps = 8\n",
      "06:51:35 [INFO] train episode 4892: winner = 0, steps = 8\n",
      "06:51:36 [INFO] train episode 4893: winner = 0, steps = 8\n",
      "06:51:39 [INFO] train episode 4894: winner = 0, steps = 8\n",
      "06:51:39 [INFO] test episode 4894:\n",
      "+++\n",
      "+++\n",
      "+++\n",
      "06:52:01 [INFO] step 0：player 1, action (2, 2)\n",
      "+++\n",
      "+++\n",
      "++o\n",
      "06:52:20 [INFO] step 1：player -1, action (0, 1)\n",
      "+x+\n",
      "+++\n",
      "++o\n",
      "06:52:35 [INFO] step 2：player 1, action (1, 2)\n",
      "+x+\n",
      "++o\n",
      "++o\n",
      "06:52:38 [INFO] step 3：player -1, action (0, 2)\n",
      "+xx\n",
      "++o\n",
      "++o\n",
      "06:52:38 [INFO] step 4：player 1, action (0, 0)\n",
      "oxx\n",
      "++o\n",
      "++o\n",
      "06:52:38 [INFO] step 5：player -1, action (1, 1)\n",
      "oxx\n",
      "+xo\n",
      "++o\n",
      "06:52:38 [INFO] step 6：player 1, action (2, 0)\n",
      "oxx\n",
      "+xo\n",
      "o+o\n",
      "06:52:38 [INFO] step 7：player -1, action (2, 1)\n",
      "oxx\n",
      "+xo\n",
      "oxo\n",
      "06:52:38 [INFO] test episode 4894: winner = -1, steps = 7\n",
      "06:53:13 [INFO] train episode 4895: winner = -1, steps = 7\n",
      "06:53:44 [INFO] train episode 4896: winner = 1, steps = 8\n",
      "06:54:05 [INFO] train episode 4897: winner = 1, steps = 4\n",
      "06:54:41 [INFO] train episode 4898: winner = 1, steps = 4\n",
      "06:55:04 [INFO] train episode 4899: winner = 0, steps = 8\n",
      "06:55:05 [INFO] train episode 4900: winner = 0, steps = 8\n",
      "06:55:21 [INFO] train episode 4901: winner = 0, steps = 8\n",
      "06:55:38 [INFO] train episode 4902: winner = -1, steps = 5\n",
      "06:56:00 [INFO] train episode 4903: winner = 1, steps = 4\n",
      "06:56:13 [INFO] train episode 4904: winner = 0, steps = 8\n",
      "06:56:22 [INFO] train episode 4905: winner = 0, steps = 8\n",
      "06:56:33 [INFO] train episode 4906: winner = -1, steps = 7\n",
      "06:56:35 [INFO] train episode 4907: winner = 1, steps = 4\n",
      "06:56:35 [INFO] train episode 4908: winner = 1, steps = 6\n",
      "06:56:54 [INFO] train episode 4909: winner = 0, steps = 8\n",
      "06:56:54 [INFO] train episode 4910: winner = 1, steps = 8\n",
      "06:57:07 [INFO] train episode 4911: winner = 0, steps = 8\n",
      "06:57:11 [INFO] train episode 4912: winner = 0, steps = 8\n",
      "06:57:18 [INFO] train episode 4913: winner = -1, steps = 7\n",
      "06:57:33 [INFO] train episode 4914: winner = 0, steps = 8\n",
      "06:57:38 [INFO] train episode 4915: winner = 1, steps = 4\n",
      "06:57:43 [INFO] train episode 4916: winner = 1, steps = 4\n",
      "06:57:49 [INFO] train episode 4917: winner = -1, steps = 5\n",
      "06:57:51 [INFO] train episode 4918: winner = 1, steps = 4\n",
      "06:57:57 [INFO] train episode 4919: winner = 0, steps = 8\n",
      "06:58:01 [INFO] train episode 4920: winner = 1, steps = 6\n",
      "06:58:10 [INFO] train episode 4921: winner = -1, steps = 7\n",
      "06:58:12 [INFO] train episode 4922: winner = -1, steps = 5\n",
      "06:58:15 [INFO] train episode 4923: winner = 1, steps = 4\n",
      "06:58:21 [INFO] train episode 4924: winner = 1, steps = 6\n",
      "06:58:24 [INFO] train episode 4925: winner = 0, steps = 8\n",
      "06:58:31 [INFO] train episode 4926: winner = 0, steps = 8\n",
      "06:58:32 [INFO] train episode 4927: winner = 1, steps = 4\n",
      "06:58:33 [INFO] train episode 4928: winner = 0, steps = 8\n",
      "06:58:38 [INFO] train episode 4929: winner = 0, steps = 8\n",
      "06:58:59 [INFO] train episode 4930: winner = -1, steps = 7\n",
      "06:59:00 [INFO] train episode 4931: winner = 1, steps = 4\n",
      "06:59:01 [INFO] train episode 4932: winner = 1, steps = 6\n",
      "06:59:01 [INFO] train episode 4933: winner = 1, steps = 4\n",
      "06:59:03 [INFO] train episode 4934: winner = 1, steps = 4\n",
      "06:59:09 [INFO] train episode 4935: winner = 1, steps = 4\n",
      "06:59:12 [INFO] train episode 4936: winner = -1, steps = 7\n",
      "06:59:13 [INFO] train episode 4937: winner = 1, steps = 4\n",
      "06:59:16 [INFO] train episode 4938: winner = 1, steps = 4\n",
      "06:59:20 [INFO] train episode 4939: winner = 0, steps = 8\n",
      "06:59:21 [INFO] train episode 4940: winner = 0, steps = 8\n",
      "06:59:25 [INFO] train episode 4941: winner = 1, steps = 6\n",
      "06:59:29 [INFO] train episode 4942: winner = 1, steps = 4\n",
      "06:59:31 [INFO] train episode 4943: winner = 1, steps = 4\n",
      "06:59:49 [INFO] train episode 4944: winner = 1, steps = 8\n",
      "06:59:50 [INFO] train episode 4945: winner = 1, steps = 4\n",
      "06:59:53 [INFO] train episode 4946: winner = -1, steps = 5\n",
      "06:59:58 [INFO] train episode 4947: winner = 1, steps = 8\n",
      "06:59:58 [INFO] train episode 4948: winner = 1, steps = 6\n",
      "07:00:02 [INFO] train episode 4949: winner = 1, steps = 4\n",
      "07:00:05 [INFO] train episode 4950: winner = -1, steps = 5\n",
      "07:00:07 [INFO] train episode 4951: winner = 1, steps = 6\n",
      "07:00:08 [INFO] train episode 4952: winner = -1, steps = 7\n",
      "07:00:10 [INFO] train episode 4953: winner = 1, steps = 6\n",
      "07:00:13 [INFO] train episode 4954: winner = 1, steps = 6\n",
      "07:00:16 [INFO] train episode 4955: winner = 1, steps = 4\n",
      "07:00:20 [INFO] train episode 4956: winner = -1, steps = 7\n",
      "07:00:20 [INFO] train episode 4957: winner = 1, steps = 4\n",
      "07:00:21 [INFO] train episode 4958: winner = 1, steps = 8\n",
      "07:00:23 [INFO] train episode 4959: winner = 0, steps = 8\n",
      "07:00:23 [INFO] train episode 4960: winner = 1, steps = 4\n",
      "07:00:27 [INFO] train episode 4961: winner = 1, steps = 6\n",
      "07:00:28 [INFO] train episode 4962: winner = 1, steps = 6\n",
      "07:00:39 [INFO] train episode 4963: winner = 0, steps = 8\n",
      "07:00:39 [INFO] train episode 4964: winner = 0, steps = 8\n",
      "07:00:42 [INFO] train episode 4965: winner = 1, steps = 6\n",
      "07:00:43 [INFO] train episode 4966: winner = 1, steps = 6\n",
      "07:00:43 [INFO] train episode 4967: winner = 0, steps = 8\n",
      "07:00:43 [INFO] train episode 4968: winner = 1, steps = 4\n",
      "07:00:46 [INFO] train episode 4969: winner = 0, steps = 8\n",
      "07:00:49 [INFO] train episode 4970: winner = 0, steps = 8\n",
      "07:00:53 [INFO] train episode 4971: winner = 0, steps = 8\n",
      "07:00:56 [INFO] train episode 4972: winner = 1, steps = 6\n",
      "07:01:03 [INFO] train episode 4973: winner = 1, steps = 8\n",
      "07:01:03 [INFO] train episode 4974: winner = 1, steps = 4\n",
      "07:01:06 [INFO] train episode 4975: winner = 1, steps = 4\n",
      "07:01:08 [INFO] train episode 4976: winner = 1, steps = 6\n",
      "07:01:12 [INFO] train episode 4977: winner = 0, steps = 8\n",
      "07:01:17 [INFO] train episode 4978: winner = -1, steps = 7\n",
      "07:01:19 [INFO] train episode 4979: winner = -1, steps = 7\n",
      "07:01:22 [INFO] train episode 4980: winner = 1, steps = 6\n",
      "07:01:22 [INFO] train episode 4981: winner = 1, steps = 4\n",
      "07:01:23 [INFO] train episode 4982: winner = -1, steps = 5\n",
      "07:01:25 [INFO] train episode 4983: winner = 0, steps = 8\n",
      "07:01:26 [INFO] train episode 4984: winner = -1, steps = 5\n",
      "07:01:27 [INFO] train episode 4985: winner = 1, steps = 6\n",
      "07:01:30 [INFO] train episode 4986: winner = 0, steps = 8\n",
      "07:01:31 [INFO] train episode 4987: winner = -1, steps = 7\n",
      "07:01:32 [INFO] train episode 4988: winner = 0, steps = 8\n",
      "07:01:33 [INFO] train episode 4989: winner = 1, steps = 8\n",
      "07:01:38 [INFO] train episode 4990: winner = 0, steps = 8\n",
      "07:01:38 [INFO] train episode 4991: winner = 1, steps = 4\n",
      "07:01:38 [INFO] train episode 4992: winner = 1, steps = 4\n",
      "07:01:39 [INFO] train episode 4993: winner = 1, steps = 6\n",
      "07:01:40 [INFO] train episode 4994: winner = 0, steps = 8\n",
      "07:01:42 [INFO] train episode 4995: winner = 1, steps = 6\n",
      "07:01:42 [INFO] train episode 4996: winner = 1, steps = 6\n",
      "07:01:44 [INFO] train episode 4997: winner = 1, steps = 6\n",
      "07:01:46 [INFO] train episode 4998: winner = 0, steps = 8\n",
      "07:01:48 [INFO] train episode 4999: winner = 0, steps = 8\n"
     ]
    }
   ],
   "source": [
    "def play_boardgame2_episode(env, agent, mode=None, verbose=False):\n",
    "    observation, winner, done = env.reset(), 0, False\n",
    "    agent.reset(mode=mode)\n",
    "    elapsed_steps = 0\n",
    "    while True:\n",
    "        if verbose:\n",
    "            board, player = observation\n",
    "            print(boardgame2.strfboard(board))\n",
    "        action = agent.step(observation, winner, done)\n",
    "        if verbose:\n",
    "            logging.info('step %d：player %d, action %s', elapsed_steps, player,\n",
    "                    action)\n",
    "        observation, winner, done, _ = env.step(action)\n",
    "        if done:\n",
    "            if verbose:\n",
    "                board, _ = observation\n",
    "                print(boardgame2.strfboard(board))\n",
    "            break\n",
    "        elapsed_steps += 1\n",
    "    agent.close()\n",
    "    return winner, elapsed_steps\n",
    "\n",
    "\n",
    "for episode in range(5000):\n",
    "    winner, elapsed_steps = play_boardgame2_episode(env, agent, mode='train')\n",
    "    logging.info('train episode %d: winner = %d, steps = %d', episode, winner,\n",
    "            elapsed_steps)\n",
    "\n",
    "    if len(agent.replayer.memory) == 0: # just finish learning\n",
    "        logging.info('test episode %d:', episode)\n",
    "        winner, elapsed_steps = play_boardgame2_episode(env, agent, mode='test',\n",
    "                verbose=True)\n",
    "        logging.info('test episode %d: winner = %d, steps = %d',\n",
    "                episode, winner, elapsed_steps)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
